# UI for Acquiring Data

Uses Jupyter Notebooks because for simplicity. Find a list of YouTube IDs and their transcript languages to create a dataset to use in this expirement

In [None]:
from youtubesearchpython import Hashtag
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api import TranscriptsDisabled
from ipywidgets import widgets, Layout
from IPython.display import display, Javascript
import asyncio
import csv

In [None]:

class VideoSearchContainer:
    NOT_USED = 'Do Not Use'

    def __init__(self, inputObj):
        self.id = inputObj['id']
        self.title = inputObj['title']
        self.isVideo = inputObj['type'] == 'video'
        self.link = inputObj['link']
        self.box = None

        try:
            self.langs = [{'language_code': x.language_code, 'language': x.language} for x in YouTubeTranscriptApi.list_transcripts(self.id)]
        except TranscriptsDisabled:
            self.langs = []
        
    @property
    def valid(self):
        return self.isVideo and len(self.langs) > 0

    @property
    def hasEnglish(self):
        return any('en' in l['language_code'] for l in self.langs)

    @property
    def boxable(self):
        return self.valid and self.hasEnglish
    
    def display(self):
        display(self.getRow())

    def getRow(self):
        wl = [widgets.Button(description=self.title, layout=Layout(width='50%'))]

        def openLink(obj):
            display(Javascript(f'window.open("{self.link}");'))
        
        wl[0].on_click(openLink)

        if self.valid:
            wl += [widgets.ToggleButtons(options=[i['language'] for i in self.langs] + [VideoSearchContainer.NOT_USED])]
        else:
            wl += [widgets.Label("Not a valid object")]
#         wl += [widgets.Label(self.link)]
        self.box = widgets.Box(wl)
        return self.box

    @property
    def _box(self):
        return self.box.children[1]

    def getValues(self):
        out = {'id':self.id, 'useable':None, 'lang':''}

        if not self.boxable or self._box.value == VideoSearchContainer.NOT_USED:
            out['useable'] = False
        else:
            out['useable'] = True
            out['lang'] = self.langs[self._box.index]['language_code']
        
        return out
    
    def __del__(self):
        if self.box is not None:
            self.box.close()
    
    def close(self):
        if self.box is not None:
            self.box.close()
            self.box = None

async def getNxt(search):
    vs = []
    nxt_search = search.next()
    
    while len(vs) < PAGE_LENGTH and nxt_search:
        vl = [VideoSearchContainer(videoData) for videoData in search.result()['result']]

        vs += [i for i in vl if i.boxable]

        global videoList
        videoList += [i.getValues() for i in vl if not i.boxable]
        
        if len(vs) < PAGE_LENGTH:
            nxt_search = search.next()
    
    return vs

def getFutureNxt(search):
    return asyncio.create_task(getNxt(search))

def onePage(videos, callback):
    nxt = widgets.Button(description="Next")
    done = widgets.Button(description="Done")

    page = widgets.VBox([v.getRow() for v in videos]+[nxt, done])

    display(page)

    def nxtButtonHandler(obj):
        page.close()
        nxt.close()
        done.close()
        callback([i.getValues() for i in videos], False)

    nxt.on_click(nxtButtonHandler)
    
    def doneButtonHandler(obj):
        page.close()
        nxt.close()
        done.close()
        callback([i.getValues() for i in videos], True)

    done.on_click(doneButtonHandler)

def pageDone(values, done=False):
    global videoList
    videoList += values

    if done:
        return

    global nxt, search
    vs = nxt.result()

    if len(vs) < 1:
        print("No more items in search")
        return

    onePage(vs, pageDone)

    nxt = getFutureNxt(search)


# Begin Selection Process

Run box and use the UI elements to select the appropriate YouTube Videos

In [None]:
PAGE_LENGTH = 30
search = Hashtag('StandUp', language = 'en', limit=30)

videoList = []

# vs = [VideoSearchContainer(videoData) for videoData in search.result()['result']]

vs = await getNxt(search)

onePage(vs, pageDone)

nxt = getFutureNxt(search)


## Run when you have completed Selection Process

In [None]:
def toCSV(videos):
    keys = videos[0].keys()

    with open('videos.csv', 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(videos)

toCSV(videoList)