In [None]:
import scrapetube
import re

from util.custom_values import CHANNEL_ID
from util.log_videos import get_videos


def extract_channel_name_from_label(label):
    """
    Return channel name from label like the following: 
    'ObviouslyASMR 4 days ago 30 minutes 23,457 views' or 
    'New ASMR 4 days ago 1 hour, 30 minutes 457 views' or 2,619,438 views etc
    """
    # Keep in mind the channel name might contain spaces
    # Use regex to exclude everything after and including the "<int> <hours/days/months/years> ago" part
    regex = r"\s(\d+\s\w+\sago)\s((\d+\s\w+,\s)?\d+\s\w+)\s((\d+,)*\d+\sviews)"
    channel_name = re.sub(regex, "", label.split('by ')[-1].replace(' - play Short', ''))
    return channel_name

def parse_views_from_text(simple_text):
    """
    Return views integer from text like the following: '23,457 views'
    """
    return int(simple_text.replace(' views', '').replace(',', ''))

def extract_video_dict_from_scrapetube(st_dict):
    """Return a video dict from a scrapetube video dict, like the following:
    {
    id: "video id",
    title: "video title",
    thumb_url: "video thumbnail url",
    views: "video views",
    # date: "video date", (not present)
    duration: "video duration",
    # channel_id: "video channel id", (not present)
    channel_name: "video channel name",
    }
    The not present fields are not present in the scrapetube dict
    """
    video_dict = {}
    video_dict['id'] = st_dict['videoId']
    video_dict['title'] = st_dict['title']['runs'][0]['text']
    video_dict['thumb_url'] = st_dict['thumbnail']['thumbnails'][0]['url']
    video_dict['views'] = parse_views_from_text(st_dict['viewCountText']['simpleText'])
    # video_dict['date'] = st_dict['publishedTimeText']['simpleText'] # This says "4 days ago", date isn't present in the scrapetube dict
    video_dict['duration'] = st_dict['lengthText']['simpleText']
    # video_dict['channel'] = st_dict['ownerText']['runs'][0]['text'] # This isn't present in the scrapetube dict
    video_dict['channel_name'] = extract_channel_name_from_label(st_dict['title']['accessibility']['accessibilityData']['label'])
    return video_dict


In [None]:
logged_videos = get_videos(False)[:-2]
# logged_videos = [] # For testing
logged_video_ids = [video["id"] for video in logged_videos]

videos = scrapetube.get_channel(CHANNEL_ID)
new_videos = []
for video in videos:
    if video["videoId"] in logged_video_ids:
        break
    new_videos.append(video)

new_videos = [extract_video_dict_from_scrapetube(video) for video in new_videos]

print(len(new_videos))
display(new_videos)

In [None]:
search_results = scrapetube.get_search("ASMR", limit=50)

search_results = [extract_video_dict_from_scrapetube(res) for res in search_results]

display(search_results)

In [None]:
# Get date for the above video ids
import pandas as pd
from util.log_videos import extract_video_basics_by_page

# Make df from search_results variable
df = pd.DataFrame(search_results)
# df

In [None]:
df["date"] = df["id"].apply(lambda id: extract_video_basics_by_page(id)[1])

In [None]:
"""
A scrapetube video dict looks like the following:
{'videoId': 'h1MsqH5dxCU',
  'thumbnail': {'thumbnails': [{'url': 'https://i.ytimg.com/vi/h1MsqH5dxCU/hqdefault.jpg?sqp=-oaymwEbCKgBEF5IVfKriqkDDggBFQAAiEIYAXABwAEG&rs=AOn4CLC3YOZ-HqTsZdt9Ff7rcCWDolKJIQ',
     'width': 168,
     'height': 94},
    {'url': 'https://i.ytimg.com/vi/h1MsqH5dxCU/hqdefault.jpg?sqp=-oaymwEbCMQBEG5IVfKriqkDDggBFQAAiEIYAXABwAEG&rs=AOn4CLBEhMdLguiTw6-IestHXbKtSHRbGw',
     'width': 196,
     'height': 110},
    {'url': 'https://i.ytimg.com/vi/h1MsqH5dxCU/hqdefault.jpg?sqp=-oaymwEcCPYBEIoBSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBiaeXPJNNw4F-6lFhOXUGU-iRWew',
     'width': 246,
     'height': 138},
    {'url': 'https://i.ytimg.com/vi/h1MsqH5dxCU/hqdefault.jpg?sqp=-oaymwEcCNACELwBSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLAVwZQbvNoCnYLV4QTVJo-xQ1dBAQ',
     'width': 336,
     'height': 188}]},
  'title': {'runs': [{'text': 'ASMR Hand Sounds but with some ✨ Chaotic ✨ Energy (Obviously)'}],
   'accessibility': {'accessibilityData': {'label': 'ASMR Hand Sounds but with some ✨ Chaotic ✨ Energy (Obviously) by ObviouslyASMR 4 days ago 30 minutes 23,441 views'}}},
  'descriptionSnippet': {'runs': [{'text': 'Quite happy with this one, I hope you enjoy :)\n\nGo ahead and stalk me:\nInstagram: ObviouslyASMR\nTwitter: ObviouslyLuuk\n\na little tipjar:\nPayPal: ObviouslyASMR@gmail.com\n\nA BIG thanks to my...'}]},
  'publishedTimeText': {'simpleText': '4 days ago'},
  'lengthText': {'accessibility': {'accessibilityData': {'label': '30 minutes, 56 seconds'}},
   'simpleText': '30:56'},
  'viewCountText': {'simpleText': '23,441 views'},
  'navigationEndpoint': {'clickTrackingParams': 'COwBENwwIhMI2IT9kqLPgAMVJER6BR1pvwD7WhhVQ2hYb2dheUM1Mm1sUk9xLU43MV9mNWeaAQYQ8jgY4Ac=',
   'commandMetadata': {'webCommandMetadata': {'url': '/watch?v=h1MsqH5dxCU',
     'webPageType': 'WEB_PAGE_TYPE_WATCH',
     'rootVe': 3832}},
   'watchEndpoint': {'videoId': 'h1MsqH5dxCU',
    'watchEndpointSupportedOnesieConfig': {'html5PlaybackOnesieConfig': {'commonConfig': {'url': 'https://rr2---sn-uhvcpaxoa-guhe.googlevideo.com/initplayback?source=youtube&oeis=1&c=WEB&oad=3200&ovd=3200&oaad=11000&oavd=11000&ocs=700&oewis=1&oputc=1&ofpcc=1&beids=24350017&msp=1&odepv=1&id=87532ca87e5dc425&ip=87.214.8.179&initcwndbps=1138750&mt=1691573106&oweuc='}}}}},
  'ownerBadges': [{'metadataBadgeRenderer': {'icon': {'iconType': 'CHECK_CIRCLE_THICK'},
     'style': 'BADGE_STYLE_TYPE_VERIFIED',
     'tooltip': 'Verified',
     'trackingParams': 'COwBENwwIhMI2IT9kqLPgAMVJER6BR1pvwD7',
     'accessibilityData': {'label': 'Verified'}}}],
  'trackingParams': 'COwBENwwIhMI2IT9kqLPgAMVJER6BR1pvwD7QKWI9_KHlcuphwE=',
  'showActionMenu': False,
  'shortViewCountText': {'accessibility': {'accessibilityData': {'label': '23K views'}},
   'simpleText': '23K views'},
  'menu': {'menuRenderer': {'items': [{'menuServiceItemRenderer': {'text': {'runs': [{'text': 'Add to queue'}]},
       'icon': {'iconType': 'ADD_TO_QUEUE_TAIL'},
       'serviceEndpoint': {'clickTrackingParams': 'CPEBEP6YBBgHIhMI2IT9kqLPgAMVJER6BR1pvwD7',
        'commandMetadata': {'webCommandMetadata': {'sendPost': True}},
        'signalServiceEndpoint': {'signal': 'CLIENT_SIGNAL',
         'actions': [{'clickTrackingParams': 'CPEBEP6YBBgHIhMI2IT9kqLPgAMVJER6BR1pvwD7',
           'addToPlaylistCommand': {'openMiniplayer': True,
            'videoId': 'h1MsqH5dxCU',
            'listType': 'PLAYLIST_EDIT_LIST_TYPE_QUEUE',
            'onCreateListCommand': {'clickTrackingParams': 'CPEBEP6YBBgHIhMI2IT9kqLPgAMVJER6BR1pvwD7',
             'commandMetadata': {'webCommandMetadata': {'sendPost': True,
               'apiUrl': '/youtubei/v1/playlist/create'}},
             'createPlaylistServiceEndpoint': {'videoIds': ['h1MsqH5dxCU'],
              'params': 'CAQ%3D'}},
            'videoIds': ['h1MsqH5dxCU']}}]}},
       'trackingParams': 'CPEBEP6YBBgHIhMI2IT9kqLPgAMVJER6BR1pvwD7'}},
     {'menuServiceItemDownloadRenderer': {'serviceEndpoint': {'clickTrackingParams': 'CPABENGqBRgIIhMI2IT9kqLPgAMVJER6BR1pvwD7',
        'offlineVideoEndpoint': {'videoId': 'h1MsqH5dxCU',
         'onAddCommand': {'clickTrackingParams': 'CPABENGqBRgIIhMI2IT9kqLPgAMVJER6BR1pvwD7',
          'getDownloadActionCommand': {'videoId': 'h1MsqH5dxCU',
           'params': 'CAI%3D'}}}},
       'trackingParams': 'CPABENGqBRgIIhMI2IT9kqLPgAMVJER6BR1pvwD7'}},
     {'menuServiceItemRenderer': {'text': {'runs': [{'text': 'Share'}]},
       'icon': {'iconType': 'SHARE'},
       'serviceEndpoint': {'clickTrackingParams': 'COwBENwwIhMI2IT9kqLPgAMVJER6BR1pvwD7',
        'commandMetadata': {'webCommandMetadata': {'sendPost': True,
          'apiUrl': '/youtubei/v1/share/get_share_panel'}},
        'shareEntityServiceEndpoint': {'serializedShareEntity': 'CgtoMU1zcUg1ZHhDVQ%3D%3D',
         'commands': [{'clickTrackingParams': 'COwBENwwIhMI2IT9kqLPgAMVJER6BR1pvwD7',
           'openPopupAction': {'popup': {'unifiedSharePanelRenderer': {'trackingParams': 'CO8BEI5iIhMI2IT9kqLPgAMVJER6BR1pvwD7',
              'showLoadingSpinner': True}},
            'popupType': 'DIALOG',
            'beReused': True}}]}},
       'trackingParams': 'COwBENwwIhMI2IT9kqLPgAMVJER6BR1pvwD7'}}],
    'trackingParams': 'COwBENwwIhMI2IT9kqLPgAMVJER6BR1pvwD7',
    'accessibility': {'accessibilityData': {'label': 'Action menu'}}}},
  'thumbnailOverlays': [{'thumbnailOverlayTimeStatusRenderer': {'text': {'accessibility': {'accessibilityData': {'label': '30 minutes, 56 seconds'}},
      'simpleText': '30:56'},
     'style': 'DEFAULT'}},
   {'thumbnailOverlayToggleButtonRenderer': {'isToggled': False,
     'untoggledIcon': {'iconType': 'WATCH_LATER'},
     'toggledIcon': {'iconType': 'CHECK'},
     'untoggledTooltip': 'Watch later',
     'toggledTooltip': 'Added',
     'untoggledServiceEndpoint': {'clickTrackingParams': 'CO4BEPnnAxgCIhMI2IT9kqLPgAMVJER6BR1pvwD7',
      'commandMetadata': {'webCommandMetadata': {'sendPost': True,
        'apiUrl': '/youtubei/v1/browse/edit_playlist'}},
      'playlistEditEndpoint': {'playlistId': 'WL',
       'actions': [{'addedVideoId': 'h1MsqH5dxCU',
         'action': 'ACTION_ADD_VIDEO'}]}},
     'toggledServiceEndpoint': {'clickTrackingParams': 'CO4BEPnnAxgCIhMI2IT9kqLPgAMVJER6BR1pvwD7',
      'commandMetadata': {'webCommandMetadata': {'sendPost': True,
        'apiUrl': '/youtubei/v1/browse/edit_playlist'}},
      'playlistEditEndpoint': {'playlistId': 'WL',
       'actions': [{'action': 'ACTION_REMOVE_VIDEO_BY_VIDEO_ID',
         'removedVideoId': 'h1MsqH5dxCU'}]}},
     'untoggledAccessibility': {'accessibilityData': {'label': 'Watch later'}},
     'toggledAccessibility': {'accessibilityData': {'label': 'Added'}},
     'trackingParams': 'CO4BEPnnAxgCIhMI2IT9kqLPgAMVJER6BR1pvwD7'}},
   {'thumbnailOverlayToggleButtonRenderer': {'untoggledIcon': {'iconType': 'ADD_TO_QUEUE_TAIL'},
     'toggledIcon': {'iconType': 'PLAYLIST_ADD_CHECK'},
     'untoggledTooltip': 'Add to queue',
     'toggledTooltip': 'Added',
     'untoggledServiceEndpoint': {'clickTrackingParams': 'CO0BEMfsBBgDIhMI2IT9kqLPgAMVJER6BR1pvwD7',
      'commandMetadata': {'webCommandMetadata': {'sendPost': True}},
      'signalServiceEndpoint': {'signal': 'CLIENT_SIGNAL',
       'actions': [{'clickTrackingParams': 'CO0BEMfsBBgDIhMI2IT9kqLPgAMVJER6BR1pvwD7',
         'addToPlaylistCommand': {'openMiniplayer': True,
          'videoId': 'h1MsqH5dxCU',
          'listType': 'PLAYLIST_EDIT_LIST_TYPE_QUEUE',
          'onCreateListCommand': {'clickTrackingParams': 'CO0BEMfsBBgDIhMI2IT9kqLPgAMVJER6BR1pvwD7',
           'commandMetadata': {'webCommandMetadata': {'sendPost': True,
             'apiUrl': '/youtubei/v1/playlist/create'}},
           'createPlaylistServiceEndpoint': {'videoIds': ['h1MsqH5dxCU'],
            'params': 'CAQ%3D'}},
          'videoIds': ['h1MsqH5dxCU']}}]}},
     'untoggledAccessibility': {'accessibilityData': {'label': 'Add to queue'}},
     'toggledAccessibility': {'accessibilityData': {'label': 'Added'}},
     'trackingParams': 'CO0BEMfsBBgDIhMI2IT9kqLPgAMVJER6BR1pvwD7'}},
   {'thumbnailOverlayNowPlayingRenderer': {'text': {'runs': [{'text': 'Now playing'}]}}}],
  'richThumbnail': {'movingThumbnailRenderer': {'movingThumbnailDetails': {'thumbnails': [{'url': 'https://i.ytimg.com/an_webp/h1MsqH5dxCU/mqdefault_6s.webp?du=3000&sqp=CO6qzaYG&rs=AOn4CLBdWxJL-uFuxDcVzR5hP_mT8NxDrA',
       'width': 320,
       'height': 180}],
     'logAsMovingThumbnail': True},
    'enableHoveredLogging': True,
    'enableOverlay': True}}}
"""
