In [None]:
import os
import re
from typing import List, Dict, Any
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
import torch
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import AutoModelForSeq2SeqLM, T5Tokenizer, BartTokenizer, BartForConditionalGeneration

def get_video_id(youtube_url):
    parsed_url = urlparse(youtube_url)
    if parsed_url.netloc in ["www.youtube.com", "youtube.com"]:
        return parse_qs(parsed_url.query).get("v", [None])[0]
    elif parsed_url.netloc in ["youtu.be"]:
        return parsed_url.path.lstrip("/")
    return None

def get_youtube_subtitles(video_url, language="en"):
    video_id = get_video_id(video_url)
    if not video_id:
        return "Không thể trích xuất ID video từ URL."
    
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
        subtitles_text = " ".join([item['text'] for item in transcript])
        subtitles_with_timestamps = "\n".join([f"{item['start']:.2f}s: {item['text']}" for item in transcript])
        return {
            "text": subtitles_text,
            "with_timestamps": subtitles_with_timestamps,
            "video_id": video_id
        }
    except Exception as e:
        return f"Lỗi khi lấy phụ đề: {e}"

def preprocess_text(text):
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()
    return text

def create_vector_store(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        separators=["\n\n", "\n", ". ", " ", ""]
    )
    chunks = [chunk.strip() for chunk in text_splitter.split_text(text) if chunk.strip()]
    # chunks = text_splitter.split_text(text)
    
    # embeddings = HuggingFaceEmbeddings(
    #     model_name="sentence-transformers/all-MiniLM-L6-v2",
    #     model_kwargs={'device': 'cuda:0' if torch.cuda.is_available() else 'cpu'}
    # )
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        encode_kwargs={'batch_size': 32}  
    )
    vector_store = FAISS.from_texts(chunks, embeddings)
    
    return vector_store

def generate_summary(text, max_length=150):
    model_name = "philschmid/bart-large-cnn-samsum" 
    
    tokenizer = BartTokenizer.from_pretrained(model_name)
    model = BartForConditionalGeneration.from_pretrained(model_name)
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    
    model = model.to(device)
    chunk_size = 1024
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    
    intermediate_summaries = []
    for chunk in chunks:
        inputs = tokenizer(chunk, return_tensors="pt", max_length=1024, truncation=True).to(device)
        summary_ids = model.generate(
            inputs["input_ids"],
            max_length=512, 
            min_length=50, 
            num_beams=4, 
            early_stopping=True
        )
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        intermediate_summaries.append(summary)
    
    combined_text = " ".join(intermediate_summaries)
    # inputs = tokenizer(combined_text, return_tensors="pt", max_length=1024, truncation=True).to(device)
    # summary_ids = model.generate(inputs["input_ids"], max_length=1024, min_length=50, num_beams=4, early_stopping=True)
    # final_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # return final_summary
    return combined_text

def setup_llm():
    model_name = "microsoft/phi-2" 
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True,
    )
    
    pipe = pipeline(
        "text-generation",
        model=model, 
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
        top_p=0.95,
        repetition_penalty=1.2
    )
    
    llm = HuggingFacePipeline(pipeline=pipe)
    return llm

def create_chat_chain(vector_store, llm):
    # memory = ConversationBufferMemory(
    #     memory_key="chat_history",
    #     return_messages=True,
    #     output_key="answer" 
    # )

    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(search_kwargs={"k": 5}),
        memory=None,
        return_source_documents=False
    )
    return chain

def youtube_video_chat_app():
    print("=== YouTube Video Chat App ===")
    video_url = input("Nhập link video YouTube: ")
    
    print("Đang lấy phụ đề video...")
    subtitles_data = get_youtube_subtitles(video_url)
    
    if isinstance(subtitles_data, str) and "Lỗi" in subtitles_data:
        print(subtitles_data)
        return
    
    video_id = subtitles_data["video_id"]
    
    subtitles_text = subtitles_data["text"]
    processed_text = preprocess_text(subtitles_text)
    # print(subtitles_text)
    print("Đang tạo vector store...")
    vector_store = create_vector_store(processed_text)
    
    print("Đang tạo tóm tắt nội dung video...")
    summary = generate_summary(processed_text)
    print("\n=== TÓM TẮT NỘI DUNG VIDEO ===")
    print(summary)
    print("===============================\n")
    
    print("Đang khởi tạo mô hình chat...")
    llm = setup_llm()
    chain = create_chat_chain(vector_store, llm)
    
    print("\nBạn có thể bắt đầu chat về nội dung video. Gõ 'exit' để thoát.")
    
    while True:
        query = input("\nBạn: ")
        if query.lower() == 'exit':
            break
        
        formatted_query = f"""
        Based on the YouTube video subtitles, please answer the following question:
        
        Question: {query}
        
        Only respond using the information found in the video subtitles. If the information is not mentioned, state that it is not available in the video.
        """
        
        response = chain({"question": formatted_query, "chat_history": []})
        print(f"\nAssistant: {response['answer']}")
        

        # https://www.youtube.com/watch?v=Up6tk1hliIM&t=113s
        # What happened to David James and Corey after the shooting incident?
if __name__ == "__main__":
    youtube_video_chat_app()

2025-03-13 08:26:40.465126: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-13 08:26:40.465157: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-13 08:26:40.465942: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-13 08:26:40.471187: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


=== YouTube Video Chat App ===


Nhập link video YouTube:  https://www.youtube.com/watch?v=Up6tk1hliIM&t=113s


Đang lấy phụ đề video...
Đang tạo vector store...


  embeddings = HuggingFaceEmbeddings(


Đang tạo tóm tắt nội dung video...

=== TÓM TẮT NỘI DUNG VIDEO ===
The Assassins bullet came within a quarter of an inch of taking my life. It was a warm beautiful day in the early evening in Butler Township in the great Commonwealth of Pennsylvania. The campaign was doing well and the crowd was cheering wildly. I was discussing the great job my Administration did on immigration at the southern border behind me and to the right was a screen with a chart of border crossings under my leadership. There was an attack on the concert. The Secret Service agents rushed to the stage and pounced on top of the singer to protect him. He was hit by a bullet on his right ear, but managed to move his head at the last moment to avoid the bullet hitting his Mark. The crowd stood by and didn't move even though a sniper fired a bullet into the crowd. Many of them immediately stood up and pointed at the sniper. They knew it was a shot to the head and they saw the blood. The crowd didn't want to leave Sere

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  llm = HuggingFacePipeline(pipeline=pipe)



Bạn có thể bắt đầu chat về nội dung video. Gõ 'exit' để thoát.



Bạn:  What happened to David James and Corey after the shooting incident?


  response = chain({"question": formatted_query, "chat_history": []})
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Assistant: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

million for the families of David James and Corey including from a friend of mine just called up he sent me a check right here I just got it $1 million from Dan Newan thank you Dan and again when speaking to the family I told them I said well I'm going to be sending you a lot of money but it can't compensate they all said the same thing you're right sir we appreciate so much what you're doing but nothing can take the place in the case of Corey and the other two by the way they were very very seriously injured but now they're doing very well they're going to be okay they're going to be doing very well the Warriors so now I ask that we observe a moment of silence in honor of our friend Corey visit our site englishspeech channel.com for exclusive access to video transcripts offline audio English lessons and private 


Bạn:  what is one plus one?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Assistant: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

million for the families of David James and Corey including from a friend of mine just called up he sent me a check right here I just got it $1 million from Dan Newan thank you Dan and again when speaking to the family I told them I said well I'm going to be sending you a lot of money but it can't compensate they all said the same thing you're right sir we appreciate so much what you're doing but nothing can take the place in the case of Corey and the other two by the way they were very very seriously injured but now they're doing very well they're going to be okay they're going to be doing very well the Warriors so now I ask that we observe a moment of silence in honor of our friend Corey visit our site englishspeech channel.com for exclusive access to video transcripts offline audio English lessons and private 


Bạn:  how crowd act after th shoot?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Assistant: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

thing is that prior to the shot if I had not moved my head at that very last instant the Assassins bullet would have perfectly hit its Mark and I would not be here tonight we would not be together the most incredible aspect of what took place on that terrible evening in the fading sun was actually seen later in almost all cases as you probably know and when even a single bullet is fired just a single bullet and we had many bullets that were being fired crowds Run for the exits or Stampede but not in this case it's very unusual this massive crowd of tens of thousands of people stood by and didn't move an inch in fact many of them bravely but automatically stood up looking for where the sniper would be they knew immediately it was a sniper and then began pointing at him you can see that if you look at the group beh


Bạn:  To solve this exercise, carefully read through the provided transcript excerpt containing contextual clues related to the event described in the YouTube video. The goal is to extract relevant information about what occurred to David James and Corey after the shooting incident.   In the given context, it states that "prior to the shot," the narrator mentions their potential survival if they hadn't moved their heads slightly during the final moments of the attack. This suggests that they narrowly avoided severe injury or death due to the bullet hitting another part of their body instead. Additionally, the mention of the crowd standing motionless indicates that they reacted calmly despite witnessing the tragic scene unfold, potentially contributing to saving more lives. However, the focus shifts towards honoring the fallen heroes and raising funds for their families shortly afterward.  Therefore, based solely on the information presented within the video subtitle excerpts, we can c

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Assistant: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

thing is that prior to the shot if I had not moved my head at that very last instant the Assassins bullet would have perfectly hit its Mark and I would not be here tonight we would not be together the most incredible aspect of what took place on that terrible evening in the fading sun was actually seen later in almost all cases as you probably know and when even a single bullet is fired just a single bullet and we had many bullets that were being fired crowds Run for the exits or Stampede but not in this case it's very unusual this massive crowd of tens of thousands of people stood by and didn't move an inch in fact many of them bravely but automatically stood up looking for where the sniper would be they knew immediately it was a sniper and then began pointing at him you can see that if you look at the group beh