# Demo Load from Youtube and forward to Graph Universe

In [None]:
# imports
from pytube import YouTube, Playlist, Channel
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_community.document_loaders import YoutubeLoader
from guvcli.constants import *

# configure
MAX_VIDEO = 10
YOUTUBE_PLAYLIST = "https://www.youtube.com/watch?v=GUh7QyCwxGk&list=PL6RpkC85SLQCGjMBsoQYlMrLmbZaEWM6U"


In [None]:
# load playlist
playlist = Playlist(YOUTUBE_PLAYLIST)
print(playlist)
playlist.owner


In [None]:
# loop playlist
prepared_docs = []
cur = 0

for video_url in playlist:
    # check
    cur+=1
    if cur > MAX_VIDEO:
        break
    
    # load video infos
    yt = YouTube(video_url)

    loader = YoutubeLoader.from_youtube_url(
                    youtube_url=video_url, 
                    add_video_info=False,
                    language=["en", "de"],
                    translation="en",                    
                    #transcript_format=TranscriptFormat.CHUNKS,
                    #chunk_size_seconds=30, 
                )
    documents = loader.load()

    # output
    print(yt.thumbnail_url)
    print(yt.metadata)
    print(video_url)
    print(documents)       

    # add to prepared
    for doc in documents:
        source = doc.metadata.get("source")
        prepared_doc = {
            FIELD_KEY: video_url,
            FIELD_TITLE: f"YTVideo {source}",
            FIELD_EMB_DOCUMENT: doc.page_content,
            FIELD_IMAGE_URL: yt.thumbnail_url,
            FIELD_LINK_URL: video_url
        }

        prepared_docs.append(prepared_doc)

In [None]:
# import libraries 
import os
import random
import datetime as dt
from dotenv import load_dotenv
from guvcli import GUV

# prepare 
load_dotenv()

GUV_ENDPOINT        = os.getenv("GUV_ENDPOINT")
GUV_UNIVERSE        = os.getenv("GUV_UNIVERSE")

YT_OWNER_TYPE       = "YTOwner"
YT_PLAYLIST_TYPE    = "YTPlaylist"
YT_VIDEO_TYPE       = "YTVideo"

# prepare universe access
connector = GUV.connect(GUV_ENDPOINT, GUV_UNIVERSE)
if connector.is_valid():
    print("GUV connected: ", connector.info())
else:
    print("GUV connection failed")
    exit(1)


# create object types
tobj_ytc = connector.object_types().create_object_type({
    "key": YT_OWNER_TYPE,
    "title": "Youtube Kanal"
})

tobj_ytp = connector.object_types().create_object_type({
    "key": YT_PLAYLIST_TYPE,
    "title": "Youtube Playlist"
})

tobj_ytv = connector.object_types().create_object_type({
    "key": YT_VIDEO_TYPE,
    "title": "Youtube Video"
})


# create relation types
trel_is_pl_of = connector.relation_types().create_relation_type({
    "key": "isYTPlaylistOf",
    "title": "Youtube Playlist vom Kanal",
    "title_outbound": "ist Playlist von",
    "title_inbound": "hat Playlist",
})

trel_is_in_pl = connector.relation_types().create_relation_type({
    "key": "isInYTPlaylist",
    "title": "Youtube Video enthalten in Playlist",
    "title_outbound": "ist Video der Playlist",
    "title_inbound": "hat Videos in Playlist",
})


In [None]:
# check
docs = len(prepared_docs)
if not playlist or docs == 0:
    print("no video")
    exit(0)

# create or update owner node
yt_owner_mgr = connector.objects(YT_OWNER_TYPE)
yt_owner_rec = yt_owner_mgr.create_object({
      "key": f"{playlist.owner}",
      "title": f"Youtube Kanal {playlist.owner}"
})

# create or update payllist node
yt_playlist_mgr = connector.objects(YT_PLAYLIST_TYPE)
yt_playlist_rec = yt_playlist_mgr.create_object({
      "key": f"{YOUTUBE_PLAYLIST}",
      "title": f"Youtube Playlist {YOUTUBE_PLAYLIST}"
})

# create relation between playlist and owner
irel_pl_owner = connector.relation_types().create_relation_for(
    type_key=trel_is_pl_of.get_value_key(),
    source=yt_playlist_rec,
    target=yt_owner_rec,
    data={
        "title": f"Playlist Owner Relation for {YOUTUBE_PLAYLIST}"
    }
)

# loop all docs and create videos and relation
yt_video_mgr = connector.objects(YT_VIDEO_TYPE)
for doc in prepared_docs:
    yt_video_rec = yt_video_mgr.create_object(doc)
    if yt_video_rec:
        irel_video_pl = connector.relation_types().create_relation_for(
            type_key=trel_is_in_pl.get_value_key(),
            source=yt_video_rec,
            target=yt_playlist_rec,
            data={
                "title": f"Video is in playlist {YOUTUBE_PLAYLIST}"
            }
        )
        print(f"Video {yt_video_rec.get_value_key()} created and linked to playlist: {irel_video_pl}")
    