Skip to content

Commit

Permalink
[Instagram] Add story/highlight extractor (yt-dlp#2006)
Browse files Browse the repository at this point in the history
Fixes ytdl-org/youtube-dl#25575
Authored by: u-spec-png
  • Loading branch information
u-spec-png committed Dec 28, 2021
1 parent e540c56 commit dd5e60b
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 0 deletions.
1 change: 1 addition & 0 deletions yt_dlp/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@
InstagramIOSIE,
InstagramUserIE,
InstagramTagIE,
InstagramStoryIE,
)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
Expand Down
74 changes: 74 additions & 0 deletions yt_dlp/extractor/instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,3 +542,77 @@ def _query_vars_for(data):
'tag_name':
data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
}


class InstagramStoryIE(InstagramBaseIE):
_VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/]+)/(?P<id>\d+)'
IE_NAME = 'instagram:story'

_TESTS = [{
'url': 'https://www.instagram.com/stories/highlights/18090946048123978/',
'info_dict': {
'id': '18090946048123978',
'title': 'Rare',
},
'playlist_mincount': 50
}]

def _real_extract(self, url):
username, story_id = self._match_valid_url(url).groups()

story_info_url = f'{username}/{story_id}/?__a=1' if username == 'highlights' else f'{username}/?__a=1'
story_info = self._download_json(f'https://www.instagram.com/stories/{story_info_url}', story_id, headers={
'X-IG-App-ID': 936619743392459,
'X-ASBD-ID': 198387,
'X-IG-WWW-Claim': 0,
'X-Requested-With': 'XMLHttpRequest',
'Referer': url,
})
user_id = story_info['user']['id']
highlight_title = traverse_obj(story_info, ('highlight', 'title'))

story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
videos = self._download_json(f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}', story_id, headers={
'X-IG-App-ID': 936619743392459,
'X-ASBD-ID': 198387,
'X-IG-WWW-Claim': 0,
})['reels']
entites = []

videos = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
for video_info in videos:
formats = []
if isinstance(video_info, list):
video_info = video_info[0]
vcodec = video_info.get('video_codec')
dash_manifest_raw = video_info.get('video_dash_manifest')
videos_list = video_info.get('video_versions')
if not (dash_manifest_raw or videos_list):
continue
for format in videos_list:
formats.append({
'url': format.get('url'),
'width': format.get('width'),
'height': format.get('height'),
'vcodec': vcodec,
})
if dash_manifest_raw:
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, story_id), mpd_id='dash'))
self._sort_formats(formats)
thumbnails = [{
'url': thumbnail.get('url'),
'width': thumbnail.get('width'),
'height': thumbnail.get('height')
} for thumbnail in traverse_obj(video_info, ('image_versions2', 'candidates')) or []]
entites.append({
'id': video_info.get('id'),
'title': f'Story by {username}',
'timestamp': int_or_none(video_info.get('taken_at')),
'uploader': traverse_obj(videos, ('user', 'full_name')),
'duration': float_or_none(video_info.get('video_duration')),
'uploader_id': user_id,
'thumbnails': thumbnails,
'formats': formats,
})

return self.playlist_result(entites, playlist_id=story_id, playlist_title=highlight_title)

0 comments on commit dd5e60b

Please sign in to comment.