In [None]:
#default_exp slideslive

In [None]:
#hide
%load_ext autoreload
%autoreload 2

In [None]:
#hide
from fastcore.test import test_fail, test_warns
from nbdev.showdoc import show_doc
from nbdev.export import notebook2script

In [None]:
#hide
#test
from myslideslive.test_helper import _cd_temp

In [None]:
#hide
#export
import json
import os
import re
import requests
import tempfile
import time
import urllib
import warnings

from lxml.etree import HTML
from xml.etree import ElementTree

# Interact with SlidesLive

> This module implements easy interaction with [SlidesLive](https://slideslive.com/).

In [None]:
#hide
#export
# Parse SlidesLive URL
_SL_REGEX_STR = ('https?://slideslive\\.(?:com|de)/'
                 '(?P<id>\\d+)'
                 '/*'
                 '(?P<name>.*)')
SL_REGEX = re.compile(_SL_REGEX_STR)

# SL INFO JSON
SL_INFO = 'https://ben.slideslive.com/player/{id}?player_token={token}'
# SL HTML page
SL_HTML = 'https://slideslive.com/{id}'

# SL CDNs
SL_CDN = 'https://cdn.slideslive.com/data/presentations/{video_id}/slides/{slide_type}/{slide_id}.jpg'
YODA_CDN = 'https://d2ygwrecguqg66.cloudfront.net/data/presentations/{id}/{data}'
# f is file format; can be webp/png. h is height; can be 432/540/720/1080.
RS_CDN = 'https://rs.slideslive.com/{video_id}/slides/{slide_id}.{format}?h={slide_type}&f={format}'  # can be .png or .webp # slide_type is size
# e.g.: https://d2ygwrecguqg66.cloudfront.net/data/presentations/38956531/slides/big/00793.jpg
#       https://d2ygwrecguqg66.cloudfront.net/data/presentations/38956531/v1/38956531.xml
#       https://d2ygwrecguqg66.cloudfront.net/data/presentations/38956531/v1/slides.json

# Slide size mapping
SIZE_MAP = {'small':432, 'medium':540, 'large':720, 'xlarge':1080}

In [None]:
#hide
#test
_url_1 = 'https://slideslive.com/38956531/beyond-static-papers-rethinking-how-we-share-scientific-understanding-in-ml'
_url_2 = 'https://slideslive.com/38956531/'
_url_3 = 'https://slideslive.de/38956531'
_id = '38956531'
_name = 'beyond-static-papers-rethinking-how-we-share-scientific-understanding-in-ml'

In [None]:
#hide
#test
_url_1_match = SL_REGEX.search(_url_1)
assert _url_1_match is not None
assert _url_1_match.group('id') == _id
assert _url_1_match.group('name') == _name

_url_2_match = SL_REGEX.search(_url_2)
assert _url_2_match is not None
assert _url_2_match.group('id') == _id
assert _url_2_match.group('name') == ''


_url_3_match = SL_REGEX.search(_url_3)
assert _url_3_match is not None
assert _url_3_match.group('id') == _id
assert _url_3_match.group('name') == ''

In [None]:
#hide
#test
_ben_url = 'https://ben.slideslive.com/player/123?player_token=456'
assert SL_INFO.format(id=123, token=456) == _ben_url
assert SL_INFO.format(id='123', token='456') == _ben_url

In [None]:
#export
def url2id(sl_url):
    """Converts SlidesLive URL to presentation ID and name."""
    sl_url_match = SL_REGEX.search(sl_url)
    if sl_url_match is None or not sl_url_match.group('id'):
        raise Exception('Could not parse the SlidesLive URL.')

    return sl_url_match.group('id'), sl_url_match.group('name')

In [None]:
#hide
#test
test_fail(url2id,
          args=['incorrect.url'],
          contains='Could not parse the SlidesLive URL.')

assert url2id(_url_1) == (_id, _name)
assert url2id(_url_2) == (_id, '')
assert url2id(_url_3) == (_id, '')

This function parses SlidesLIve URL into the presentation ID and name.

In [None]:
sl_url = ('https://slideslive.com/38956531/'
          'beyond-static-papers-rethinking-how-we-share-scientific-understanding-in-ml')
my_sl_id, my_sl_name = url2id(sl_url)

my_sl_id, my_sl_name

('38956531',
 'beyond-static-papers-rethinking-how-we-share-scientific-understanding-in-ml')

In [None]:
#export
def get_sl_info(sl_id):
    """Pulls information about a SlidesLive presentation."""
    if (not isinstance(sl_id, int)
          and (isinstance(sl_id, str) and not sl_id.isdecimal())):
        raise TypeError('Incorrect SlidesLive ID format.')

    # get player token
    html_source_url = SL_HTML.format(id=sl_id)
    html_source_request = requests.get(html_source_url)
    html_source = HTML(html_source_request.content.decode())
    sl_token = html_source.xpath('//div[@data-player-token]/@data-player-token')
    if not isinstance(sl_token, list) or len(sl_token) != 1:
        raise RuntimeError('Could not retrieve the data player token. '
                           'Please report this error.')
    sl_token = sl_token[0]

    info_url = SL_INFO.format(id=sl_id, token=sl_token)
    info_request = requests.get(info_url).content.decode()

    _m3u_header = '#EXTM3U'
    if info_request.startswith(_m3u_header):
        info_request_list = info_request.split('\n')
        assert info_request_list[0] == _m3u_header
        del info_request_list[0]

        info_json = {}
        for i in info_request_list:
            assert ':' in i
            i = i.split(':')
            key, val = i[0], ':'.join(i[1:])
            for pre in ['#EXT-SL-PRESENTATION-', '#EXT-SL-VOD-', '#EXT-SL-']:
                if key.startswith(pre):
                    key = key[len(pre):].lower().replace('-', '_')
            assert key not in info_json
            info_json[key] = val
    else:
        info_json = json.loads(info_request)

    return info_json

Pulls video presentation details from SlidesLive.

In [None]:
get_sl_info(my_sl_id)

{'version': '1',
 'account_id': '84503',
 'id': '38956531',
 'title': 'Beyond Static Papers: Rethinking How We Share Scientific Understanding in ML',
 'updated_at': '2023-10-20T15:29:03Z',
 'player_type': 'video_slideshow',
 'start_time': '2494000',
 'thumbnail': 'https://ma.slideslive.com/library/presentations/38956531/thumbnail/beyond-static-papers-rethinking-how-we-share-scientific-understanding-in-ml_ebgwSR_big.jpg',
 'slideslive_logo_visible': 'false',
 'slideslive_logo_linkify': 'false',
 'custom_cmcd_tracking': 'false',
 'playlist_type': 'vod',
 'media_set_id': '119992',
 'video_service_name': 'yoda',
 'video_id': 'oHAAFl-q6gQx',
 'video_ken_enabled': 'true',
 'video_servers': '["1159783934.rsc.cdn77.org","1511376917.rsc.cdn77.org"]',
 'slides_xml_url': 'https://s.slideslive.com/38956531/v1/38956531.xml?1650713664',
 'slides_json_url': 'https://s.slideslive.com/38956531/v1/slides.json?1650713664',
 'subtitles': '[{"name":"English","language":"en","subtitles_id":56748,"webvtt_url

In [None]:
#hide
#test
test_fail(get_sl_info,
          args=['id'],
          contains='Incorrect SlidesLive ID format.')

_sl_info = get_sl_info(_id)
assert isinstance(_sl_info, dict)
assert _sl_info['title'].lower().replace(':', '') == _name.replace('-', ' ')

In [None]:
#export
#hide
def parse_slide_xml(xml, mode='string'):
    """
    Parse the SlidesLive slide XML metadata.

    `mode` can either be `string` or `file`.
    """
    if mode not in ('string', 'file'):
        raise ValueError('The xml parse mode can either be *string* or *file*.')

    slide_properties = ['orderId', 'timeSec', 'time', 'slideName']

    if mode == 'string':
        xml_root = ElementTree.fromstring(xml)
    else:
        assert mode == 'file'
        with open(xml, 'r') as f:
            xml_tree = ElementTree.parse(f)
        xml_root = xml_tree.getroot()
    if xml_root.tag != 'videoContent':
        raise RuntimeError(f'Cannot process this XML structure: {xml_root.tag}.')

    slides = []
    for node in xml_root:
        if node is None:
            continue
        if node.tag != 'slide':
            raise RuntimeError(f'Unexpected slide type: {node.tag}.')

        slide = {}
        for n in node:
            if n.tag not in slide_properties:
                raise RuntimeError(f'Unexpected slide specifier: {n.tag}.')
            slide[n.tag] = n.text
        slides.append(slide)

    return slides

In [None]:
#hide
# This function processes the XML representation of SlidesLive slides metadata
# and returns its JSON representation.
# This XML structure can either be read from a file (`mode='file'`) or a string
# (`mode='string'`, *default*).

In [None]:
#hide
sl_xml = '''
<videoContent>
  <slide>
    <orderId>1</orderId>
    <timeSec>0</timeSec>
    <time>0</time>
    <slideName>00001</slideName>
  </slide>
  <slide>
    <orderId>2</orderId>
    <timeSec>1382</timeSec>
    <time>1382073</time>
    <slideName>00002</slideName>
  </slide>
</videoContent>
'''

In [None]:
#hide
parse_slide_xml(sl_xml, mode='string')

[{'orderId': '1', 'timeSec': '0', 'time': '0', 'slideName': '00001'},
 {'orderId': '2', 'timeSec': '1382', 'time': '1382073', 'slideName': '00002'}]

In [None]:
#hide
xml_file = tempfile.NamedTemporaryFile(mode='w+b')
xml_file.write(sl_xml.encode())
xml_file.seek(0)

parse_slide_xml(xml_file.name, mode='file')

[{'orderId': '1', 'timeSec': '0', 'time': '0', 'slideName': '00001'},
 {'orderId': '2', 'timeSec': '1382', 'time': '1382073', 'slideName': '00002'}]

In [None]:
#hide
#test
_unexpected_xml_1 = '''
<content>
  <element>
    <orderId>1</orderId>
    <timeSec>0</timeSec>
    <foo>bar</foo>
    <time>0</time>
    <slideName>00001</slideName>
  </element>
</content>
'''
_unexpected_xml_2 = '''
<videoContent>
  <element>
    <orderId>1</orderId>
    <timeSec>0</timeSec>
    <foo>bar</foo>
    <time>0</time>
    <slideName>00001</slideName>
  </element>
</videoContent>
'''
_unexpected_xml_3 = '''
<videoContent>
  <slide>
    <orderId>1</orderId>
    <timeSec>0</timeSec>
    <foo>bar</foo>
    <time>0</time>
    <slideName>00001</slideName>
  </slide>
</videoContent>
'''
test_fail(parse_slide_xml,
          args=[_unexpected_xml_1],
          kwargs=dict(mode='str'),
          contains='The xml parse mode can either be *string* or *file*.')
test_fail(parse_slide_xml,
          args=[_unexpected_xml_1],
          kwargs=dict(mode='string'),
          contains='Cannot process this XML structure: content.')
test_fail(parse_slide_xml,
          args=[_unexpected_xml_2],
          kwargs=dict(mode='string'),
          contains='Unexpected slide type: element.')
test_fail(parse_slide_xml,
          args=[_unexpected_xml_3],
          kwargs=dict(mode='string'),
          contains='Unexpected slide specifier: foo.')

_xml_string = parse_slide_xml(xml_file.name, mode='file')
_xml_file = parse_slide_xml(xml_file.name, mode='file')
assert isinstance(_xml_string, list)
assert isinstance(_xml_string[0], dict)
assert _xml_string[0]['orderId'] == '1'
assert _xml_string[1]['orderId'] == '2'
assert isinstance(_xml_file, list)
assert isinstance(_xml_file[0], dict)
assert _xml_file[0]['orderId'] == '1'
assert _xml_file[1]['orderId'] == '2'

In [None]:
#hide
xml_file.close()

In [None]:
#export
def get_slide_metadata(sl_meta_url, approach='json'):
    """
    Processes metadata of slides associated with a SlidesLive presentation.

    `approach` is one of `json` or `xml`.
    It specifies the strategy for extracting slide metadata.
    """
    if approach not in ('xml', 'json'):
        raise ValueError('The approach can either be *json* or *xml*.')

    meta_request = requests.get(sl_meta_url)
    if not meta_request.ok:
        raise RuntimeError(f'Request failed ({sl_meta_url})')

    meta_content = meta_request.content.decode()
    if approach == 'json':
        meta_data = json.loads(meta_content)
    else:
        assert approach == 'xml'
        meta_data_ = parse_slide_xml(meta_content)
        meta_data_ = {int(d['orderId']): {'time': int(float(d['time'])),
                                          'type': 'image',
                                          'image': {'name': d['slideName']}}
                      for d in meta_data_}
        meta_data = {'slides': [meta_data_[i] for i in sorted(meta_data_.keys())]}

    return meta_data

This function extracts the synchronisation between slide images and video presentation.
This information can either be pulled from XML (`approach='xml'`) or
JSON (`approach='json'`, *default*) format.

In [None]:
slides_structure_json = 'https://cdn.slideslive.com/data/presentations/38956531/v1/slides.json'
slides_metadata = get_slide_metadata(slides_structure_json)

print(slides_metadata.get('slide_qualities'))
print(slides_metadata.get('slides')[0])

['big', 'medium']
{'time': 0, 'type': 'image', 'image': {'name': '00001'}}


In [None]:
#hide
#test
test_fail(get_slide_metadata,
          args=['url'],
          kwargs=dict(approach='foo'),
          contains='The approach can either be *json* or *xml*.')

test_url = 'https://s.slideslive.com/38988636/v4/38988636.xml?1661984709'
test_fail(get_slide_metadata,
          args=[test_url],
          kwargs=dict(approach='xml'),
          contains=f'Request failed ({test_url})')

_meta_json = 'https://cdn.slideslive.com/data/presentations/38956531/v1/slides.json?1624456122'
_meta_json_proc = get_slide_metadata(_meta_json, 'json')
_meta_xml = 'https://cdn.slideslive.com/data/presentations/38956531/v1/38956531.xml?1624456122'
_meta_xml_proc = get_slide_metadata(_meta_xml, 'xml')

assert 'slide_qualities' in _meta_json_proc
assert 'slide_qualities' not in _meta_xml_proc

assert 'slides' in _meta_json_proc
assert 'slides' in _meta_xml_proc

for i, j in zip(_meta_json_proc['slides'], _meta_xml_proc['slides']):
    assert i == j

In [None]:
#export
def get_urls(video_id, slide_meta, slide_type='xlarge', slide_format='png',
             slide=(None, None), time=(None, None)):
    """
    Composes a list of URLs for slides of a given SlidesLive presentation.

    `video_id` specifies the ID of a SlidesLive presentation.
    `slide_meta` is the metadata of a SlidesLive presentation
    as given by the `get_slide_metadata` function.
    `slide_type` specifies the size of the slide.
    `slide_format` specifies the image format of the slide.

    A subset of slides may be extracted with this function using either
    the `slide` or `time` parameter (but not both simultaneously).

    The `slide` parameter takes a range of slides to be extracted based
    on the slide ID numbers visible in a SlidesLive presentation.
    For example, `slide=(5, 7)` to extract slides 5--7, **inclusive**;
    `slide=(5, None)` to extract from slide 5 **onwards**; or
    `slide=(None, 6)` to extract up to slide 6 **inclusive**.

    The `time` parameter takes a range of time (visible in a SlidesLive
    presentation) for which slides are to be extracted.
    For example, `time=(5, 10)` to extract slides starting at second 5
    (**inclusive**) and ending before second 10 (**exclusive**);
    `time=(5, None)` to extract from second 5 **onwards**; or
    `time=(None, 50)` to extract up to second 60 **exclusive**.
    """
    if not isinstance(slide, tuple) or len(slide) != 2:
        raise TypeError('Numeric slide bound (slide) must be a 2-tuple.')
    if not isinstance(time, tuple) or len(time) != 2:
        raise TypeError('Time-based slide bound (time) must be a 2-tuple.')

    slide_given = slide[0] is not None or slide[1] is not None
    time_given = time[0] is not None or time[1] is not None
    if slide_given and time_given:
        raise RuntimeError('Both slide and time bounds cannot be used simultaneously.')

    if slide_type not in SIZE_MAP:
        _vals = [f'"{i}"' for i in sorted(list(SIZE_MAP.keys()))]
        raise ValueError(f'The slide type (slide_type={slide_type}) can only '
                         f'be one of the following: {", ".join(_vals)}.')
    slide_size = SIZE_MAP[slide_type]

    if slide_format not in ('png', 'webp'):
        raise ValueError(f'The slide format (slide_format={slide_format}) can either be "png" or "webp".')

    slides = []
    if slide_given:
        lower_bound = -float('inf') if slide[0] is None else slide[0]
        upper_bound = float('inf') if slide[1] is None else slide[1]
        for i, s in enumerate(slide_meta['slides']):
            i_ = i + 1
            if i_ >= lower_bound and i_ <= upper_bound:
                slides.append(RS_CDN.format(
                    video_id=video_id,
                    slide_type=slide_size,
                    slide_id=s['image']['name'],
                    format=s['image'].get('extname', slide_format).strip('.')))
    elif time_given:
        lower_bound = -float('inf') if time[0] is None else time[0]
        upper_bound = float('inf') if time[1] is None else time[1]
        s = slide_meta['slides']
        for i in range(0, len(s) - 1):
            t_start = int(s[i]['time'] / 1000)  # inclusive
            t_end = int(s[i + 1]['time'] / 1000)  # exclusive

            if t_start >= lower_bound and t_end <= upper_bound:
                add_slide = True
            elif (t_start < lower_bound and t_end > lower_bound
                      and t_end < upper_bound):
                add_slide = True
            elif (t_start < upper_bound and t_end > upper_bound
                      and t_start >= lower_bound):
                add_slide = True
            else:
                add_slide = False

            if add_slide:
                slides.append(RS_CDN.format(
                    video_id=video_id,
                    slide_type=slide_size,
                    slide_id=s[i]['image']['name'],
                    format=s[i]['image'].get('extname', slide_format).strip('.')))
        # TODO: i may be undefined for only one slide (see line #466)
        else:  # handle the last slide
            t_start = int(s[i + 1]['time'] / 1000)  # inclusive
            t_end = None  # exclusive

            if t_start >= lower_bound and t_start < upper_bound:
                slides.append(RS_CDN.format(
                    video_id=video_id,
                    slide_type=slide_size,
                    slide_id=s[i + 1]['image']['name'],
                    format=s[i + 1]['image'].get('extname', slide_format).strip('.')))
    else:
        slides = [RS_CDN.format(video_id=video_id,
                                slide_type=slide_size,
                                slide_id=s['image']['name'],
                                format=s['image'].get('extname', slide_format).strip('.'))
                  for s in slide_meta['slides']]

    return slides

This function allows to generate a list of slide URLs for a given SlidesLive presentation.
You can filter the slides by *time* or *ID* if your talk is in the middle of a session.

In [None]:
my_id_bounds = (1074, 1163)

my_urls = get_urls(my_sl_id, slides_metadata,
                   slide=my_id_bounds)
print(my_urls[0])
print(my_urls[-1])

https://rs.slideslive.com/38956531/slides/01074.png?h=1080&f=png
https://rs.slideslive.com/38956531/slides/01163.png?h=1080&f=png


In [None]:
#hide
#test
_get_urls_meta = {
    'slides': [
        {'time': 0, 'image': {'name': '1'}},
        {'time': 5000, 'image': {'name': '2'}},
        {'time': 10000, 'image': {'name': '3'}},
        {'time': 15000, 'image': {'name': '4'}},
        {'time': 20000, 'image': {'name': '5'}}
    ]
}
_get_urls_meta_1 = {
    'slides': [
        {'type': 'image', 'time': 0, 'image': {'name': '1', 'extname': '.jpg'}},
        {'type': 'image', 'time': 5000, 'image': {'name': '2', 'extname': '.jpg'}},
        {'type': 'image', 'time': 10000, 'image': {'name': '3', 'extname': '.jpg'}},
        {'type': 'image', 'time': 15000, 'image': {'name': '4', 'extname': '.jpg'}},
        {'type': 'image', 'time': 20000, 'image': {'name': '5', 'extname': '.jpg'}}
    ]
}

test_fail(get_urls,
          args=['xxx', _get_urls_meta],
          kwargs=dict(slide_type='xxx', slide=None, time=None),
          contains='Numeric slide bound (slide) must be a 2-tuple.')
test_fail(get_urls,
          args=['xxx', _get_urls_meta],
          kwargs=dict(slide_type='xxx', slide=(0, 6), time=None),
          contains='Time-based slide bound (time) must be a 2-tuple.')
test_fail(get_urls,
          args=['xxx', _get_urls_meta],
          kwargs=dict(slide_type='xxx', slide=(0, 6), time=(10, 15)),
          contains='Both slide and time bounds cannot be used simultaneously.')
test_fail(get_urls,
          args=['xxx', _get_urls_meta],
          kwargs=dict(slide_type='xsmall', slide=(0, 6)),
          contains='The slide type (slide_type=xsmall) can only be one of the '
                   'following: "large", "medium", "small", "xlarge".')
test_fail(get_urls,
          args=['xxx', _get_urls_meta],
          kwargs=dict(slide_type='small', slide=(0, 6), slide_format='abc'),
          contains='The slide format (slide_format=abc) can either be "png" or "webp".')

assert (get_urls('xxx', _get_urls_meta, slide_type='small')
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(1, 6)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', slide_format='webp')
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='webp')
                for i in range(1, 6)])
assert (get_urls('xxx', _get_urls_meta_1, slide_type='small')
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='jpg')
                for i in range(1, 6)])
assert (get_urls('xxx', _get_urls_meta_1, slide_type='small', slide_format='webp')
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='jpg')
                for i in range(1, 6)])

assert (get_urls('xxx', _get_urls_meta, slide_type='small', slide=(2, 4))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(2, 5)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', slide=(0, 4))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(1, 5)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', slide=(-50, 4))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(1, 5)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', slide=(-50, 50))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(1, 6)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', slide=(1, 5))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(1, 6)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', slide=(4, 5))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(4, 6)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', slide=(4, 6))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(4, 6)])

# precise slides
assert (get_urls('xxx', _get_urls_meta, slide_type='small', time=(0, 5))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=1, format='png')])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', time=(5, 10))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=2, format='png')])
# in-between slides
assert (get_urls('xxx', _get_urls_meta, slide_type='small', time=(3, 17))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(1, 5)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', time=(5, 17))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(2, 5)])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', time=(3, 20))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=i, format='png')
                for i in range(1, 5)])
# out of range
assert (get_urls('xxx', _get_urls_meta, slide_type='small', time=(-50, 5))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=1, format='png')])
assert (get_urls('xxx', _get_urls_meta, slide_type='small', time=(20, 1000))
            == [RS_CDN.format(video_id='xxx', slide_type=SIZE_MAP['small'], slide_id=5, format='png')])

In [None]:
#export
def download_slides(url_list, sleep_time=.2, jobs=16,
                    directory=None, technique='python'):
    """
    Downloads files from a list of URLs (`url_list`).

    The destination directory is either `slides` created
    in the current working directory, or a path specified
    via the `directory` parameter.

    Three different download strategies are supported:

    * `technique='python'` -- downloads the images through
      Python's `requests` library one by one, pausing for
      `sleep_time` (`0.2` seconds, by default) after each
      download.
    * `technique='wget'` -- downloads the images by invoking
      `wget` for each image in the list, pausing for
      `sleep_time` (`0.2` seconds, by default) after each
      download.
    * `technique='wget+parallel'` -- downloads multiple images
      simultaneously -- specified by the `jobs` parameter
      (`16`, by default)-- by invoking `wget` thorugh `parallel`.
    """
    if technique not in ('python', 'wget', 'wget+parallel'):
        raise ValueError('The download `technique` should be one of: '
                         'python, wget, wget+parallel.')

    if directory is None:
        slides_dir = os.path.join(os.getcwd(), 'slides')
    else:
        slides_dir = directory

    if os.path.exists(slides_dir):
        if not os.path.isdir(slides_dir):
            raise RuntimeError(
                'The slides destination is a file '
                f'and not a directory.\n({slides_dir})')
    else:
        os.mkdir(slides_dir)

    if technique in ('python', 'wget'):
        for url in url_list:
            clean_url = urllib.parse.urljoin(url, urllib.parse.urlparse(url).path)
            fn = os.path.basename(clean_url)
            fn_path = os.path.join(slides_dir, fn)

            if os.path.exists(fn_path):
                if os.path.isfile(fn_path):
                    warnings.warn(f'File {fn_path} already exists; skipping download.')
                else:
                    warnings.warn(f'The file path -- {fn_path} -- is a directory; '
                                  'skipping download.')
            else:
                if technique == 'python':
                    with open(fn_path, 'wb') as f:
                        r = requests.get(url)
                        f.write(r.content)
                else:
                    assert technique == 'wget'
                    stream = os.popen(f'wget -P {slides_dir} {url}')
                    print(stream.read())
                time.sleep(sleep_time)
    else:
        assert technique == 'wget+parallel'
        with tempfile.NamedTemporaryFile(mode='w') as parallel_file:
            parallel_file.write('\n'.join(url_list))
            parallel_file.seek(0)

            stream = os.popen(f'parallel -j {jobs} wget -P {slides_dir} < {parallel_file.name}')
            print(stream.read())

            for f in [f for f in os.listdir(slides_dir) if os.path.isfile(os.path.join(slides_dir, f))]:
                clean_f = urllib.parse.urlparse(f).path
                if not os.path.exists(clean_f):
                    os.rename(os.path.join(slides_dir, f), os.path.join(slides_dir, clean_f))

You can use this function ot download slides to a selected directory.
If you want to speed yp the process, install `wget` and `parallel`,
and use the `technique='wget+parallel'` option.

In [None]:
my_slides = [
    'https://rs.slideslive.com/38956531/slides/00793.png?h=1080&f=png',
    'https://rs.slideslive.com/38956531/slides/00794.png?h=1080&f=png'
]
download_slides(my_slides, directory='my_slides_dir')

In [None]:
ls my_slides_dir

00793.png  00794.png


In [None]:
download_slides(my_slides, directory='my_slides_dir_wget-parallel', technique='wget+parallel')

--2024-02-26 21:12:22--  https://rs.slideslive.com/38956531/slides/00793.png?h=1080&f=png
Resolving rs.slideslive.com (rs.slideslive.com)... 2606:4700:21::8d65:780b, 2606:4700:21::8d65:780a, 141.101.120.11, ...
Connecting to rs.slideslive.com (rs.slideslive.com)|2606:4700:21::8d65:780b|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 299659 (293K) [image/png]
Saving to: ‘my_slides_dir_wget-parallel/00793.png?h=1080&f=png’

     0K .......... .......... .......... .......... .......... 17% 7.69M 0s
    50K .......... .......... .......... .......... .......... 34% 4.26M 0s
   100K .......... .......... .......... .......... .......... 51% 2.90M 0s
   150K .......... .......... .......... .......... .......... 68% 4.02M 0s
   200K .......... .......... .......... .......... .......... 85% 5.58M 0s
   250K .......... .......... .......... .......... ..        100% 4.79M=0.06s

2024-02-26 21:12:23 (4.45 MB/s) - ‘my_slides_dir_wget-parallel/00793.png?h=1080&f=png’ s




--2024-02-26 21:12:22--  https://rs.slideslive.com/38956531/slides/00794.png?h=1080&f=png
Resolving rs.slideslive.com (rs.slideslive.com)... 2606:4700:21::8d65:780b, 2606:4700:21::8d65:780a, 141.101.120.11, ...
Connecting to rs.slideslive.com (rs.slideslive.com)|2606:4700:21::8d65:780b|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 300396 (293K) [image/png]
Saving to: ‘my_slides_dir_wget-parallel/00794.png?h=1080&f=png’

     0K .......... .......... .......... .......... .......... 17%  828K 0s
    50K .......... .......... .......... .......... .......... 34% 1.94M 0s
   100K .......... .......... .......... .......... .......... 51%  108M 0s
   150K .......... .......... .......... .......... .......... 68% 12.1M 0s
   200K .......... .......... .......... .......... .......... 85%  973K 0s
   250K .......... .......... .......... .......... ...       100%  118M=0.1s

2024-02-26 21:12:24 (2.02 MB/s) - ‘my_slides_dir_wget-parallel/00794.png?h=1080&f=png’ sa

In [None]:
ls my_slides_dir_wget-parallel

00793.png  00794.png


In [None]:
#hide
#test
CWD = os.getcwd()

In [None]:
#hide
#test
_dl_slides = [
    'https://rs.slideslive.com/38956531/slides/00800.png?h=1080&f=png',
    'https://rs.slideslive.com/38956531/slides/00801.png?h=1080&f=png'
]
_dl_files = [os.path.basename(urllib.parse.urljoin(i, urllib.parse.urlparse(i).path))
             for i in _dl_slides]

test_fail(download_slides,
          args=[_dl_slides],
          kwargs=dict(technique='xxx'),
          contains='The download `technique` should be one of: '
                   'python, wget, wget+parallel.')
with tempfile.NamedTemporaryFile() as tf:
    test_fail(download_slides,
              args=[_dl_slides],
              kwargs=dict(technique='wget+parallel', directory=tf.name),
              contains='The slides destination is a file '
                       f'and not a directory.\n({tf.name})')
# creating dirs
with tempfile.TemporaryDirectory() as tempdir:
    with _cd_temp(tempdir):
        assert os.getcwd().endswith(tempdir)
        assert os.path.exists(tempdir) and os.path.isdir(tempdir)

        # dirs
        ## default dir
        download_slides(_dl_slides)
        assert os.path.exists(tempdir) and os.path.isdir(tempdir)
        temp_slides_dir = os.path.join(tempdir, 'slides')
        assert os.path.exists(temp_slides_dir) and os.path.isdir(temp_slides_dir)
        for i in _dl_files:
            i_f = os.path.join(tempdir, 'slides', i)
            assert os.path.exists(i_f) and os.path.isfile(i_f)
        ## named dir
        download_slides(_dl_slides, directory='foo')
        temp_slides_dir = os.path.join(tempdir, 'foo')
        assert os.path.exists(temp_slides_dir) and os.path.isdir(temp_slides_dir)
        for i in _dl_files:
            i_f = os.path.join(tempdir, 'foo', i)
            assert os.path.exists(i_f) and os.path.isfile(i_f)

        # warn
        test_warns(lambda: download_slides(_dl_slides, directory='foo'))

    assert os.getcwd() == CWD

In [None]:
#export
def ffmpeg_concat_script(slide_meta, slide_folder=None, last_duration=None,
                         slide=(None, None), time=(None, None)):
    """
    Builds an ffmpeg frame concatination string from slide metadata.
    Since the duration of the very last slide cannot be inferred,
    it lasts for a user-specified amount of time
    (`last_diration`, `5` by default).

    `slide_folder` specifies the location of the slide images.
    By default, it is the `slides` folder in the current
    working directory.

    A subset of slides may be extracted with this function using either
    the `slide` or `time` parameter (but not both simultaneously).

    The `slide` parameter takes a range of slides to be extracted based
    on the slide ID numbers visible in a SlidesLive presentation.
    For example, `slide=(5, 7)` to extract slides 5--7, **inclusive**;
    `slide=(5, None)` to extract from slide 5 **onwards**; or
    `slide=(None, 6)` to extract up to slide 6 **inclusive**.

    The `time` parameter takes a range of time (visible in a SlidesLive
    presentation) for which slides are to be extracted.
    For example, `time=(5, 10)` to extract slides starting at second 5
    (**inclusive**) and ending before second 10 (**exclusive**);
    `time=(5, None)` to extract from second 5 **onwards**; or
    `time=(None, 50)` to extract up to second 60 **exclusive**.
    """
    def _slide_exists(_slide_file):
        _f = os.path.join(slide_folder, f"{_slide_file}.png")
        _f = os.path.abspath(_f)
        if not os.path.exists(_f) or not os.path.isfile(_f):
            raise RuntimeError(f'{_f} file does not exist.')
        return _f

    if not isinstance(slide, tuple) or len(slide) != 2:
        raise TypeError('Numeric slide bound (slide) must be a 2-tuple.')
    if not isinstance(time, tuple) or len(time) != 2:
        raise TypeError('Time-based slide bound (time) must be a 2-tuple.')

    slide_given = slide[0] is not None or slide[1] is not None
    time_given = time[0] is not None or time[1] is not None
    if slide_given and time_given:
        raise RuntimeError('Both slide and time bounds cannot be used simultaneously.')

    if slide_folder is None:
        slide_folder = os.path.join(os.getcwd(), 'slides')
    if not os.path.exists(slide_folder) or not os.path.isdir(slide_folder):
        raise ValueError(f'Given directory does not exist: {slide_folder}.')

    ffmpeg = []
    glob_start, glob_end = None, None
    if slide_given:
        lower_bound = -float('inf') if slide[0] is None else slide[0]
        upper_bound = float('inf') if slide[1] is None else slide[1]
        for i in range(len(slide_meta['slides']) - 1):
            i_ = i + 1
            if i_ >= lower_bound and i_ <= upper_bound:
                t_start = slide_meta['slides'][i]['time']
                t_end = slide_meta['slides'][i_]['time']
                t_duration = (t_end - t_start) / 1000
                f = _slide_exists(slide_meta['slides'][i]['image']['name'])
                ffmpeg += [f"file '{f}'", f'duration {t_duration:.3f}']

                glob_start = t_start / 1000 if glob_start is None else glob_start
                glob_end = t_end / 1000
        # TODO: i may be undefined for only one slide (see line #466)
        else:
            i_ = i + 2
            if i_ >= lower_bound and i_ <= upper_bound:
                f = _slide_exists(slide_meta['slides'][i + 1]['image']['name'])
                last_duration = 5 if last_duration is None else last_duration
                ffmpeg += [f"file '{f}'", f'duration {last_duration:.3f}']

                _glob = slide_meta['slides'][i + 1]['time']
                glob_start = _glob / 1000 if glob_start is None else glob_start
                glob_end = (_glob / 1000) + last_duration
    elif time_given:
        lower_bound = -float('inf') if time[0] is None else time[0]
        upper_bound = float('inf') if time[1] is None else time[1]
        for i in range(len(slide_meta['slides']) - 1):
            t_start = int(slide_meta['slides'][i]['time'] / 1000)  # inclusive
            t_end = int(slide_meta['slides'][i + 1]['time'] / 1000)  # exclusive

            if t_start >= lower_bound and t_end <= upper_bound:
                add_slide = True
                t_start_ = slide_meta['slides'][i]['time']
                t_end_ = slide_meta['slides'][i + 1]['time']
            elif (t_start < lower_bound and t_end > lower_bound
                      and t_end < upper_bound):
                add_slide = True
                t_start_ = lower_bound * 1000
                t_end_ = slide_meta['slides'][i + 1]['time']
            elif (t_start < upper_bound and t_end > upper_bound
                      and t_start >= lower_bound):
                add_slide = True
                t_start_ = slide_meta['slides'][i]['time']
                t_end_ = upper_bound * 1000
            else:
                add_slide = False
                t_start_ = None
                t_end_ = None

            if add_slide:
                f = _slide_exists(slide_meta['slides'][i]['image']['name'])
                t_duration = (t_end_ - t_start_) / 1000
                ffmpeg += [f"file '{f}'", f'duration {t_duration:.3f}']

                glob_start = t_start_ / 1000 if glob_start is None else glob_start
                glob_end = t_end_ / 1000
        # TODO: i may be undefined for only one slide (see line #466)
        else:  # handle the last slide
            t_start = int(slide_meta['slides'][i + 1]['time'] / 1000)  # inclusive
            t_end = None  # exclusive
            t_start_ = slide_meta['slides'][i + 1]['time'] / 1000
            if t_start >= lower_bound and t_start < upper_bound:
                f = _slide_exists(slide_meta['slides'][i + 1]['image']['name'])
                if upper_bound == float('inf'):
                    duration = 5 if last_duration is None else last_duration
                else:
                    if last_duration is None:
                        duration = upper_bound - t_start
                    else:
                        if t_start + last_duration < upper_bound:
                            duration = last_duration
                        else:
                            duration = upper_bound - t_start
                ffmpeg += [f"file '{f}'", f'duration {duration:.3f}']

                glob_start = t_start_ if glob_start is None else glob_start
                glob_end = t_start_ + duration
    else:
        _slides_iter = len(slide_meta['slides']) - 1
        for i in range(_slides_iter):
            i_ = i + 1
            t_start = slide_meta['slides'][i]['time']
            t_end = slide_meta['slides'][i_]['time']
            t_duration = (t_end - t_start) / 1000

            f = _slide_exists(slide_meta['slides'][i]['image']['name'])
            ffmpeg += [f"file '{f}'", f'duration {t_duration:.3f}']

            glob_start = t_start / 1000 if glob_start is None else glob_start
        else:
            if not _slides_iter:
                i = -1
                assert slide_meta['slides'][i + 1]['time'] == 0
                glob_start = 0.0
            f = _slide_exists(slide_meta['slides'][i + 1]['image']['name'])
            last_duration = 5 if last_duration is None else last_duration
            ffmpeg += [f"file '{f}'", f'duration {last_duration:.3f}']

            glob_end = (slide_meta['slides'][i + 1]['time'] / 1000) + last_duration

    # NOTE: the last image must be duplicated without duration due to a bug
    #       in ffmpeg (https://trac.ffmpeg.org/wiki/Slideshow)
    if len(ffmpeg) > 1:
        ffmpeg.append(ffmpeg[-2])

    return '\n'.join(ffmpeg), glob_start, glob_end

This function creates a script for concatenating images into a video.

> It can be executed with the `ffmpeg -safe 0 -f concat -i ffmpeg_concat_script.txt -vsync vfr slides.mp4`
> command.
> See the `compose_ffmpeg_video` function for more details.

In [None]:
my_slides_ffmpeg, start_second, end_second = ffmpeg_concat_script(
    slides_metadata,
    slide_folder='my_slides_dir',
    slide=(793, 794))

print(my_slides_ffmpeg)
print(f'\n{start_second}--{end_second}')

file '/Users/kacper/Desktop/myslideslive/src/my_slides_dir/00793.png'
duration 29.023
file '/Users/kacper/Desktop/myslideslive/src/my_slides_dir/00794.png'
duration 1.994
file '/Users/kacper/Desktop/myslideslive/src/my_slides_dir/00794.png'

10611.802--10642.819


In [None]:
#hide
#test
_sl_files = [f"{i['image']['name']}.png"
             for i in _get_urls_meta['slides']]
_sl_dur = [5, 5, 5, 5, 5]

test_fail(ffmpeg_concat_script,
          args=[_get_urls_meta],
          kwargs=dict(slide=None, time=None),
          contains='Numeric slide bound (slide) must be a 2-tuple.')
test_fail(ffmpeg_concat_script,
          args=[_get_urls_meta],
          kwargs=dict(slide=(0, 6), time=None),
          contains='Time-based slide bound (time) must be a 2-tuple.')
test_fail(ffmpeg_concat_script,
          args=[_get_urls_meta],
          kwargs=dict(slide=(0, 6), time=(10, 15)),
          contains='Both slide and time bounds cannot be used simultaneously.')
test_fail(ffmpeg_concat_script,
          args=[_get_urls_meta],
          kwargs=dict(slide_folder='test_xyz'),
          contains='Given directory does not exist: test_xyz.')

with tempfile.TemporaryDirectory() as tempdir:
    with _cd_temp(tempdir):
        assert os.getcwd().endswith(tempdir)
        assert os.path.exists(tempdir) and os.path.isdir(tempdir)
        os.mkdir('slides')
        for i in _sl_files:
            _sl_path = os.path.join(tempdir, 'slides', i)
            with open(_sl_path, 'w') as f:
                f.write('')
            assert os.path.isfile(_sl_path)

        # no bound
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta)
        assert _gs == 0 and _ge == 25
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]  # the range - 1
        for i in range(0, len(_ffmpeg_list) - 1, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_sl_dur[i_]:.3f}'

        # item bound
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, slide=(2, 4))
        assert _gs == 5 and _ge == 20
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        for i in range(2, 8, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i - 2].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i - 2 + 1] == f'duration {_sl_dur[i_]:.3f}'
        ##
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, slide=(0, 4))
        assert _gs == 0 and _ge == 20
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        for i in range(0, 8, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_sl_dur[i_]:.3f}'
        ##
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, slide=(-50, 4))
        assert _gs == 0 and _ge == 20
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        for i in range(0, 8, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_sl_dur[i_]:.3f}'
        ##
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, slide=(-50, 50))
        assert _gs == 0 and _ge == 25
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]  # the range - 1
        for i in range(0, len(_ffmpeg_list) - 1, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_sl_dur[i_]:.3f}'
        ##
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, slide=(1, 5))
        assert _gs == 0 and _ge == 25
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        for i in range(0, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_sl_dur[i_]:.3f}'
        ##
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, slide=(4, 5))
        assert _gs == 15 and _ge == 25
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        for i in range(6, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i - 6].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i - 6 + 1] == f'duration {_sl_dur[i_]:.3f}'
        ##
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, slide=(4, 6))
        assert _gs == 15 and _ge == 25
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        for i in range(6, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i - 6].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i - 6 + 1] == f'duration {_sl_dur[i_]:.3f}'

        # time bound
        ## precise slides
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(0, 5))
        assert _gs == 0 and _ge == 5
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        for i in range(0, 2, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_sl_dur[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(5, 10))
        assert _gs == 5 and _ge == 10
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        for i in range(2, 4, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i - 2].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i - 2 + 1] == f'duration {_sl_dur[i_]:.3f}'
        ## in-between slides
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 17))
        assert _gs == 3 and _ge == 17
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 2, None]
        for i in range(0, 8, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(5, 17))
        assert _gs == 5 and _ge == 17
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [None, 5, 5, 2, None]
        for i in range(2, 8, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i - 2].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i - 2 + 1] == f'duration {_timing[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 20))
        assert _gs == 3 and _ge == 20
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, None]
        for i in range(0, 8, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 22))
        assert _gs == 3 and _ge == 22
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, 2]
        for i in range(0, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        ## out of range
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 25))
        assert _gs == 3 and _ge == 25
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, 5]
        for i in range(0, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 30))
        assert _gs == 3 and _ge == 30
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, 10]
        for i in range(0, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        ### last_duration
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 20), last_duration=20)
        assert _gs == 3 and _ge == 20
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, None]
        for i in range(0, 8, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 22), last_duration=20)
        assert _gs == 3 and _ge == 22
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, 2]
        for i in range(0, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        ## out of range
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 25), last_duration=3)
        assert _gs == 3 and _ge == 23
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, 3]
        for i in range(0, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, 30), last_duration=20)
        assert _gs == 3 and _ge == 30
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, 10]
        for i in range(0, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(3, None), last_duration=20)
        assert _gs == 3 and _ge == 40
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [2, 5, 5, 5, 20]
        for i in range(0, 8, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        ###
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(-50, 5))
        assert _gs == 0 and _ge == 5
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [5, None, None, None, None]
        for i in range(0, 2, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i + 1] == f'duration {_timing[i_]:.3f}'
        _ffmpeg_script, _gs, _ge = ffmpeg_concat_script(_get_urls_meta, time=(20, 1000))
        assert _gs == 20 and _ge == 1000
        _ffmpeg_list = _ffmpeg_script.split('\n')
        assert _ffmpeg_list[-1] == _ffmpeg_list[-3]
        _timing = [None, None, None, None, 980]
        for i in range(8, 10, 2):
            i_ = int(i / 2)
            assert _ffmpeg_list[i - 8].endswith(f"{_sl_files[i_]}'")
            assert _ffmpeg_list[i - 8 + 1] == f'duration {_timing[i_]:.3f}'

In [None]:
#export
def compose_ffmpeg_video(ffmpeg_script, video_file=None):
    """
    Builds video slides from an ffmpeg script using the
    `ffmpeg -safe 0 -f concat -i ffmpeg_concat.txt -vsync vfr slides.mp4` command.
    """
    if video_file is None:
        video_file = 'slides.mp4'
    if not video_file.endswith('.mp4'):
        video_file += '.mp4'
    if os.path.exists(video_file):
        raise RuntimeError(f'{video_file} video file already exists.')

    ffmpeg_script_list = ffmpeg_script.split('\n')
    assert len(ffmpeg_script_list) > 2, '3 elements constitute a single frame'
    if len(ffmpeg_script_list) == 3:
        img = ffmpeg_script_list[0]
        assert img.startswith("file '") and img.endswith("'")
        img = img[6:-1]

        duration = ffmpeg_script_list[1]
        assert duration.startswith('duration ')
        duration = duration[9:]

        # -c:v libx264
        stream = os.popen(f'ffmpeg -loop 1 -i {img} -t {duration} {video_file}')
        print(stream.read())
    else:
        with tempfile.NamedTemporaryFile(mode='w') as tf:
            tf.write(ffmpeg_script)
            tf.seek(0)

            # -pix_fmt yuv420p
            stream = os.popen(f'ffmpeg -safe 0 -f concat -i {tf.name} -vsync vfr {video_file}')
            print(stream.read())

Alternatively, you can save the ffmpeg script to a file yourself and
execute this command from your terminal.

In [None]:
compose_ffmpeg_video(my_slides_ffmpeg)




ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.1.1_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopen

In [None]:
ls -la slides.mp4

-rw-r--r--@ 1 kacper  staff  274107 26 Feb 21:12 slides.mp4


In [None]:
#hide
#test
with tempfile.TemporaryDirectory() as tempdir:
    with _cd_temp(tempdir):
        assert os.getcwd().endswith(tempdir)
        assert os.path.exists(tempdir) and os.path.isdir(tempdir)

        _sl_path = os.path.join(tempdir, 'slides.mp4')
        with open(_sl_path, 'w') as f:
            f.write('')
        assert os.path.isfile(_sl_path)

        test_fail(compose_ffmpeg_video,
                  args=[''],
                  kwargs=dict(video_file=None),
                  contains='slides.mp4 video file already exists.')

In [None]:
#export
class SlidesLive():
    """
    Simplifies SlidesLive interaction.

    Should be initialised with SlidesLive presentation URL (`video_url`).
    Optionally, a destination folder for downloading slides may be specified
    (`slides_folder`).

    See `url2id`, `get_sl_info` and `get_slide_metadata` for more details.
    """
    def __init__(self, video_url, slides_folder=None):
        """Initialises SlidesLive."""
        self.slides_dir = slides_folder
        self.slides_video = None
        self.slide = None
        self.time = None

        self.start_time = None
        self.end_time = None

        self.video_id, self.video_name = url2id(video_url)
        self.video_description = get_sl_info(self.video_id)

        # try XML approach first
        try_xml = True
        if 'slides_xml_url' in self.video_description and try_xml:
            try:
                meta = get_slide_metadata(
                    self.video_description['slides_xml_url'], approach='xml')
            except RuntimeError as e:
                if str(e) == f'Request failed ({self.video_description["slides_xml_url"]})':
                    try_xml = False
                else:
                    raise e
        else:
            try_xml = False

        # try JSON approach if XML failed
        try_json = True
        if 'slides_json_url' in self.video_description and not try_xml:
            try:
                meta = get_slide_metadata(
                    self.video_description['slides_json_url'], approach='json')
            except RuntimeError as e:
                if str(e) == f'Request failed ({self.video_description["slides_json_url"]})':
                    try_json = False
                else:
                    raise e
        else:
            try_json = False

        # raise an error if both failed
        if not try_xml and not try_json:
            raise RuntimeError('Failed to retrieve XML or JSON slides metadata')

        self.video_metadata = meta

    def get_slide_urls(self, slide_type='xlarge', slide=None, time=None):
        """Returns a list of slide URLs -- see `get_urls` for more details."""
        if self.slide is None and slide is None:
            self.slide = (None, None)
        elif self.slide is None and slide is not None:
            self.slide = slide
        elif self.slide is not None and slide is None:
            pass
        elif self.slide is not None and slide is not None:
            self.slide = slide

        if self.time is None and time is None:
            self.time = (None, None)
        elif self.time is None and time is not None:
            self.time = time
        elif self.time is not None and time is None:
            pass
        elif self.time is not None and time is not None:
            self.time = time

        return get_urls(self.video_id, self.video_metadata,
                        slide_type=slide_type,
                        slide=self.slide, time=self.time)

    def download_slides(self, slide_type='xlarge', slide=None, time=None,
                        sleep_time=.2, jobs=16, directory=None, technique='python'):
        """Downloads a collection of slides -- see `get_urls` and `download_slide` for more details."""
        if directory is not None:
            self.slides_dir = directory
        elif self.slides_dir is None:
            self.slides_dir = self.video_id

        url_list = self.get_slide_urls(slide_type=slide_type,
                                       slide=slide, time=time)
        download_slides(url_list, sleep_time=sleep_time, jobs=jobs,
                       directory=self.slides_dir, technique=technique)

    def get_ffmpeg_script(self, slide_folder=None, last_duration=None,
                          slide=None, time=None):
        """Composes ffmpeg script -- see `ffmpeg_concat_script` for more details."""
        if slide_folder is not None:
            self.slides_dir = slide_folder
        elif self.slides_dir is None:
            self.slides_dir = self.video_id

        if self.slide is None and slide is None:
            self.slide = (None, None)
        elif self.slide is None and slide is not None:
            self.slide = slide
        elif self.slide is not None and slide is None:
            pass
        elif self.slide is not None and slide is not None:
            self.slide = slide

        if self.time is None and time is None:
            self.time = (None, None)
        elif self.time is None and time is not None:
            self.time = time
        elif self.time is not None and time is None:
            pass
        elif self.time is not None and time is not None:
            self.time = time

        return ffmpeg_concat_script(self.video_metadata, slide_folder=self.slides_dir,
                                    last_duration=last_duration, slide=self.slide, time=self.time)

    def compose_video(self, video_file=None,
                      slide_folder=None, last_duration=None,
                      slide=None, time=None):
        """Builds slides video -- see `ffmpeg_concat_script` and `compose_ffmpeg_video` for more details."""
        if video_file is not None:
            self.slides_video = video_file
        elif self.slides_dir is None and self.slides_video is None:
            self.slides_video = f'{self.video_id}.mp4'
        elif self.slides_dir is not None and self.slides_video is None:
            self.slides_video = f'{self.slides_dir}.mp4'

        if slide_folder is not None:
            self.slides_dir = slide_folder
        elif slide_folder is None and self.slides_dir is None:
            self.slides_dir = self.video_id

        ffmpeg_script, self.start_time, self.end_time = self.get_ffmpeg_script(
            slide_folder=self.slides_dir, last_duration=last_duration,
            slide=slide, time=time)
        compose_ffmpeg_video(ffmpeg_script, video_file=self.slides_video)

        print(f'\n\nExtracted time segment in seconds:\n    {self.start_time}--{self.end_time}')

In [None]:
show_doc(SlidesLive.get_slide_urls)

<h4 id="SlidesLive.get_slide_urls" class="doc_header"><code>SlidesLive.get_slide_urls</code><a href="__main__.py#L55" class="source_link" style="float:right">[source]</a></h4>

> <code>SlidesLive.get_slide_urls</code>(**`slide_type`**=*`'xlarge'`*, **`slide`**=*`None`*, **`time`**=*`None`*)

Returns a list of slide URLs -- see [`get_urls`](/myslideslive/slideslive.html#get_urls) for more details.

In [None]:
show_doc(SlidesLive.download_slides)

<h4 id="SlidesLive.download_slides" class="doc_header"><code>SlidesLive.download_slides</code><a href="__main__.py#L79" class="source_link" style="float:right">[source]</a></h4>

> <code>SlidesLive.download_slides</code>(**`slide_type`**=*`'xlarge'`*, **`slide`**=*`None`*, **`time`**=*`None`*, **`sleep_time`**=*`0.2`*, **`jobs`**=*`16`*, **`directory`**=*`None`*, **`technique`**=*`'python'`*)

Downloads a collection of slides -- see [`get_urls`](/myslideslive/slideslive.html#get_urls) and `download_slide` for more details.

In [None]:
show_doc(SlidesLive.get_ffmpeg_script)

<h4 id="SlidesLive.get_ffmpeg_script" class="doc_header"><code>SlidesLive.get_ffmpeg_script</code><a href="__main__.py#L92" class="source_link" style="float:right">[source]</a></h4>

> <code>SlidesLive.get_ffmpeg_script</code>(**`slide_folder`**=*`None`*, **`last_duration`**=*`None`*, **`slide`**=*`None`*, **`time`**=*`None`*)

Composes ffmpeg script -- see [`ffmpeg_concat_script`](/myslideslive/slideslive.html#ffmpeg_concat_script) for more details.

In [None]:
show_doc(SlidesLive.compose_video)

<h4 id="SlidesLive.compose_video" class="doc_header"><code>SlidesLive.compose_video</code><a href="__main__.py#L121" class="source_link" style="float:right">[source]</a></h4>

> <code>SlidesLive.compose_video</code>(**`video_file`**=*`None`*, **`slide_folder`**=*`None`*, **`last_duration`**=*`None`*, **`slide`**=*`None`*, **`time`**=*`None`*)

Builds slides video -- see [`ffmpeg_concat_script`](/myslideslive/slideslive.html#ffmpeg_concat_script) and [`compose_ffmpeg_video`](/myslideslive/slideslive.html#compose_ffmpeg_video) for more details.

The `SlidesLive` class helps to navigate through the `myslideslive.slideslive` module functions.

In [None]:
msl = SlidesLive('https://slideslive.com/38956531/'
                 'beyond-static-papers-'
                 'rethinking-how-we-share-scientific-understanding-in-ml',
                 slides_folder='beyond-static-papers')

In [None]:
msl.download_slides(slide=(1074, 1075))

In [None]:
ls {msl.slides_dir}

01074.png  01075.png


In [None]:
msl.compose_video()




Extracted time segment in seconds:
    15215.247--15250.244


ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.1.1_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopen

In [None]:
ls -la {msl.slides_video}

-rw-r--r--@ 1 kacper  staff  107350 26 Feb 21:49 beyond-static-papers.mp4


In [None]:
#hide
#test
# TODO

In [None]:
msl1 = SlidesLive('https://slideslive.com/38988636/'
                  '24-sparsity-on-gpu-tensor-cores',
                  slides_folder='sparsity-on-gpu-tensor-cores')

In [None]:
msl1.download_slides(slide=(30, 31))



In [None]:
ls {msl1.slides_dir}

0y6lL6DLzQDW61hc.png  Al9BWavE9eBg6Cbt.png  RKfA7o8KZcOoOIN2.png
2afSaynhm8Bny1UR.png  NatAtRXM3yJfYDZE.png  yju18VFM0VQbavac.png


In [None]:
msl1.compose_video()

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.1.1_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopen




Extracted time segment in seconds:
    1253.12--1417.0


[out#0/mp4 @ 0x13483b140] video:405kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.243480%
frame=    7 fps=0.0 q=-1.0 Lsize=     406kB time=00:02:28.80 bitrate=  22.4kbits/s speed= 744x    
[libx264 @ 0x13483ff70] frame I:1     Avg QP: 9.12  size:162985
[libx264 @ 0x13483ff70] frame P:2     Avg QP:11.14  size: 43707
[libx264 @ 0x13483ff70] frame B:4     Avg QP:13.62  size: 40932
[libx264 @ 0x13483ff70] consecutive B-frames: 14.3% 28.6%  0.0% 57.1%
[libx264 @ 0x13483ff70] mb I  I16..4: 50.4% 22.3% 27.3%
[libx264 @ 0x13483ff70] mb P  I16..4: 11.0%  7.3%  6.5%  P16..4:  6.4%  1.3%  0.9%  0.0%  0.0%    skip:66.5%
[libx264 @ 0x13483ff70] mb B  I16..4:  1.9%  0.7%  5.2%  B16..8: 11.9%  1.3%  0.5%  direct: 2.4%  skip:76.1%  L0:35.1% L1:62.5% BI: 2.4%
[libx264 @ 0x13483ff70] 8x8 transform intra:22.0% inter:47.4%
[libx264 @ 0x13483ff70] coded y,u,v intra: 30.1% 20.9% 19.4% inter: 4.7% 1.9% 1.4%
[libx264 @ 0x13483ff70] i16 v,h,dc,p: 72% 24%  4%  0%
[libx264 @ 0x

In [None]:
ls -la {msl1.slides_video}

-rw-r--r--@ 1 kacper  staff  415828 26 Feb 21:53 sparsity-on-gpu-tensor-cores.mp4


In [None]:
#hide
#test
# TODO

In [None]:
#hide
notebook2script()