In [109]:
from flask import Flask, render_template, request, jsonify
import threading, uuid
from datetime import datetime
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings, OpenAIEmbeddings 
from langchain.chains.summarize import load_summarize_chain
from langchain.vectorstores import Chroma,FAISS
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
import re
import random, time
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document
from langchain.indexes import VectorstoreIndexCreator
from langchain_core.prompts import ChatPromptTemplate
# from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnableParallel,RunnablePassthrough
from operator import itemgetter
from langchain.chat_models import ChatOpenAI
from langchain_community.document_loaders import JSONLoader
from langchain_community.chat_models import ChatOpenAI

In [19]:
def get_video_id_from_url(url):
    video_id = re.search(r'(?<=v=)[^&#]+', url)
    if video_id is None:
        video_id = re.search(r'(?<=be/)[^&#]+', url)
    return video_id.group(0) if video_id else None

def save_transcript_to_file(video_url, output_file, session_id):
    video_id = get_video_id_from_url(video_url)
    if video_id is None:
        print("Invalid YouTube URL")
        return

    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
    except Exception as e:
        print(f"An error occurred: {e}")
        return

    local_timestamps = transcripts_with_timestamps[session_id] = {}
    with open(output_file, "w", encoding="utf-8") as f:
        for entry in transcript:
            f.write(entry["text"] + " ")
            local_timestamps[entry['text']] = entry['start']

def get_timestamp(session_id, sentence):
    # Check if the sentence exists in the transcript and print the timestamp
    for transcript_sentence, timestamp in transcripts_with_timestamps[session_id].items():
        if sentence in transcript_sentence:
            #print(f"A sentence containing '{sentence}' starts at {timestamp} seconds in the video.")
            return int(timestamp)
    return None

In [20]:
def save_transcript():
    start_time = time.time()  # Start the timer
    session_id = str(uuid.uuid4())  # Generate a unique ID for this session
    transcript_file = f"transcripts/{datetime.now().strftime('%Y%m%d%H%M%S')}_{random.randint(0,100)}.txt"
    video_url = request.form['video_url']
    video_urls[session_id] = video_url.split("&")[0]
    save_transcript_to_file(video_url, transcript_file, session_id)
    print(f"Save transcript took {time.time() - start_time} seconds to execute.")
    start_time = time.time()  # Start the timer


    loader = TextLoader(transcript_file)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    print(f"Split QA texts took {time.time() - start_time} seconds to execute.")
    start_time = time.time()  # Start the timer

    with open(transcript_file, "r", encoding="utf-8") as f:
        transcript = f.read()
    texts_sum = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_text(transcript)
    print(f"Split Sum texts took {time.time() - start_time} seconds to execute.")
    start_time = time.time()  # Start the timer
    # Create Document objects from the transcript parts
    docs = [Document(page_content=t) for t in texts_sum[:3]]
    print(len(texts_sum))
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    docsearch = Chroma.from_documents(texts, embeddings)
    qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0), chain_type="stuff", 
                                     retriever=docsearch.as_retriever(
                                     search_type="similarity_score_threshold",
                                     search_kwargs={'k':5, 'fetch_k': 50, 'score_threshold': 0.7}
                                     ), 
                                     return_source_documents=True)
    
    indexes[session_id] = qa
    print(f"QA Embedding took {time.time() - start_time} seconds to execute.")
    start_time = time.time()

In [21]:
def query():
    start_time = time.time()  # Start the timer
    
    session_id = request.form['session_id']  # The client must send the session ID with each request
    if session_id in indexes:
        user_query = request.form['query']
        output = indexes[session_id]({"query": user_query})

        result = output["result"]
        #print(output["source_documents"])
        print(f"Answering took {time.time() - start_time} seconds to execute.")
        start_time = time.time()  # Start the timer
        clip_links = []
        for doc in output["source_documents"]:
            timestamp = get_timestamp(session_id, doc.page_content[:20])
            if timestamp is not None:  # If the timestamp is not None, add it to the timestamp_str
                link = video_urls[session_id]+"&t="+str(timestamp)+"s"
                clip_links.append(link)

        return jsonify({'result': result, 'clip_links': clip_links})
    else:
        return "No transcript loaded", 400

In [22]:
get_video_id_from_url("https://www.youtube.com/watch?v=dtp6b76pMak")

'dtp6b76pMak'

In [23]:
transcript = YouTubeTranscriptApi.get_transcript("dtp6b76pMak")

In [24]:
local_timestamps = {}
with open("transcript.txt", "w", encoding="utf-8") as f:
        for entry in transcript:
            f.write(entry["text"] + " ")
            local_timestamps[entry['text']] = entry['start']

In [27]:
transcript_file = "transcript.txt"
loader = TextLoader(transcript_file)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# print(f"Split QA texts took {time.time() - start_time} seconds to execute.")
# start_time = time.time()  # Start the timer


In [146]:
import os
os.environ["OPENAI_API_KEY"] = ""

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma.from_documents(texts, embeddings)
qa = RetrievalQA.from_chain_type(llm=OpenAI(model="gpt-3.5-turbo"), chain_type="stuff", 
                                 retriever=vectorstore.as_retriever(), 
                                 return_source_documents=True)



In [69]:
vectordb = Chroma.from_documents(texts, OpenAIEmbeddings(openai_api_key=""))

template = ChatPromptTemplate.from_template("""
You are a helpful AI Social media expert. You specialize in YouTube and YouTube keywords. You're an expert at writing descriptions, titles, and you use keywords that help with video discovery. You are given the transcript of a video as context. Answer the questions based on the given context:
{context}

Question: {question}
""")


In [90]:

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)



In [197]:
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0), chain_type="stuff", 
                                 retriever=vectordb.as_retriever(), 
                                 return_source_documents=True)

In [198]:
output = qa({"query":" what are the video about"})


In [199]:
output['result']

'The videos are about using the Vision Pro.'

In [200]:
output['source_documents']

[Document(page_content='through a YouTube video.', metadata={'seq_num': 205, 'source': '/Users/junwei/Desktop/InsightFlow/ChatWithVideo/transcript.json', 'start': 480.75}),
 Document(page_content='through a YouTube video.', metadata={'seq_num': 205, 'source': '/Users/junwei/Desktop/InsightFlow/ChatWithVideo/transcript.json', 'start': 480.75}),
 Document(page_content='through a YouTube video.', metadata={'seq_num': 205, 'source': '/Users/junwei/Desktop/InsightFlow/ChatWithVideo/transcript.json', 'start': 480.75}),
 Document(page_content='This video is all about\nusing the Vision Pro.', metadata={'seq_num': 20, 'source': '/Users/junwei/Desktop/InsightFlow/ChatWithVideo/transcript.json', 'start': 52.38})]

In [213]:
import json
from pathlib import Path
from pprint import pprint

def save_to_json(transcript,filename):
    with open(filename, 'w') as f:
        json.dump(transcript, f, indent=4) 

def load_documents(filepath):
    def metadata_func(record: dict, metadata: dict) -> dict:
        metadata["start"] = record.get("start")
        if "end" in record.keys():
            metadata["end"] = record.get("end")
        return metadata

    loader = JSONLoader(
        file_path=filepath,
        jq_schema='.[]',
        text_content=True,
        content_key="text",
        metadata_func = metadata_func
    )

    documents = loader.load()

    return documents

transcript = YouTubeTranscriptApi.get_transcript("dtp6b76pMak")
save_to_json(transcript,"transcript.json")
documents = load_documents("transcript.json")
        

In [201]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma.from_documents(documents, embeddings)
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0), chain_type="stuff", 
                                 retriever=vectordb.as_retriever(
                                     # search_type="similarity_score_threshold",
                                     # search_kwargs={'k':5, 'fetch_k': 50, 'score_threshold': 0.7}
                                 ), 
                                 return_source_documents=True)



In [228]:
transcript

"(upbeat music) - All right, so you've seen the unboxing. Now it's time for the breakdown. What is using the Apple\nVision Pro actually like? This is easily one of Apple's\ncraziest, most radical, possibly dystopian products of all time. And I have a lot of thoughts here, like I've been using it\nfor about a week now. There are some parts of this thing that are absolutely incredible, and some other parts that feel weird, or borderline unfinished. There are all kinds of new technologies, from a new operating system\nto infrared eye tracking to virtually reconstructed\nversions of you. I feel like there are so\nmany actually new things that you have to understand\nin order to get a sense of what this headset\nactually is and what it does. So I'm gonna break this\ndown into two parts. This video is all about\nusing the Vision Pro. It's everything I've\nlearned from the past week of wearing and getting used to\nthis thing every single day. But I'm also working\non a more wide ranging, poss

In [227]:
with open("transcript.txt") as f:
    transcript = f.read()



In [236]:
from langchain_experimental.text_splitter import SemanticChunker

t1 = "Data science is the study of data to extract meaningful insights for business. It is a multidisciplinary approach that combines principles and practices from the fields of mathematics, statistics, artificial intelligence, and computer engineering to analyze large amounts of data. This analysis helps data scientists to ask and answer questions like what happened, why it happened, what will happen, and what can be done with the results."

t2 = "Data science is important because it combines tools, methods, and technology to generate meaning from data. Modern organizations are inundated with data; there is a proliferation of devices that can automatically collect and store information. Online systems and payment portals capture more data in the fields of e-commerce, medicine, finance, and every other aspect of human life. We have text, audio, video, and image data available in vast quantities."

t3 = "Descriptive analysis examines data to gain insights into what happened or what is happening in the data environment. It is characterized by data visualizations such as pie charts, bar charts, line graphs, tables, or generated narratives. For example, a flight booking service may record data like the number of tickets booked each day. Descriptive analysis will reveal booking spikes, booking slumps, and high-performing months for this service."


text_splitter = SemanticChunker(OpenAIEmbeddings())
docs = text_splitter.create_documents([t1,t2,t3],[{"start":1,"end":2},{"start":3,"end":4},{"start":5,"end":6}])

In [237]:
docs

[Document(page_content='Data science is the study of data to extract meaningful insights for business. It is a multidisciplinary approach that combines principles and practices from the fields of mathematics, statistics, artificial intelligence, and computer engineering to analyze large amounts of data.', metadata={'start': 1, 'end': 2}),
 Document(page_content='This analysis helps data scientists to ask and answer questions like what happened, why it happened, what will happen, and what can be done with the results.', metadata={'start': 1, 'end': 2}),
 Document(page_content='Data science is important because it combines tools, methods, and technology to generate meaning from data. Modern organizations are inundated with data; there is a proliferation of devices that can automatically collect and store information.', metadata={'start': 3, 'end': 4}),
 Document(page_content='Online systems and payment portals capture more data in the fields of e-commerce, medicine, finance, and every ot