<a href="https://colab.research.google.com/github/Joshika-Mentor/AI-Query-Tube/blob/Ruthika/final_output.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio pandas numpy google-api-python-client youtube-transcript-api sentence-transformers scikit-learn


**Module 8: Final Deployment & Search Interface**

In [None]:
import gradio as gr
import pandas as pd
import numpy as np

from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
API_KEY = "AIzaSyAUueJtpbgNovyq-7oQsFWJ64zWJPc2z2w"
youtube = build("youtube", "v3", developerKey="AIzaSyAUueJtpbgNovyq-7oQsFWJ64zWJPc2z2w")

model = SentenceTransformer("all-MiniLM-L6-v2")


In [None]:
def get_videos(search_query, max_results=10):
    response = youtube.search().list(
        q=search_query,
        part="id,snippet",
        type="video",
        maxResults=max_results
    ).execute()

    video_ids = []
    titles = []

    for item in response["items"]:
        video_ids.append(item["id"]["videoId"])
        titles.append(item["snippet"]["title"])

    return video_ids, titles


In [None]:
def get_transcripts(video_ids):
    transcripts = []
    for vid in video_ids:
        try:
            transcript = YouTubeTranscriptApi.get_transcript(vid)
            transcripts.append(" ".join([t["text"] for t in transcript]))
        except:
            transcripts.append("")
    return transcripts


In [None]:
def gradio_semantic_search(youtube_query, user_query, max_results):
    video_ids, titles = get_videos(youtube_query, max_results)
    transcripts = get_transcripts(video_ids)

    df = pd.DataFrame({
        "video_id": video_ids,
        "title": titles,
        "transcript": transcripts
    })

    df["combined_text"] = df["title"] + " " + df["transcript"]
    df["embedding"] = list(model.encode(df["combined_text"].tolist()))

    query_embedding = model.encode(user_query)
    scores = cosine_similarity(
        query_embedding.reshape(1, -1),
        np.vstack(df["embedding"].values)
    )[0]

    df["score"] = scores
    df = df.sort_values(by="score", ascending=False)

    # Format output for Colab (Markdown)
    output = ""
    for _, row in df.iterrows():
        video_id = row["video_id"]
        title = row["title"]
        score = row["score"]

        video_url = f"https://www.youtube.com/watch?v={video_id}"
        thumbnail = f"https://img.youtube.com/vi/{video_id}/0.jpg"

        output += f"""
### {title}
**Semantic Score:** {score:.3f}
[‚ñ∂Ô∏è Watch on YouTube]({video_url})

![Thumbnail]({thumbnail})

---
"""
    return output


In [None]:
with gr.Blocks() as demo:
    gr.Markdown("# üåê Global YouTube Semantic Search (AI QueryTube)")

    yt_query = gr.Textbox(
        label="YouTube Search Query (keyword search)",
        placeholder="e.g. python tutorials"
    )

    user_query = gr.Textbox(
        label="Semantic Query (meaning-based)",
        placeholder="e.g. beginner python basics"
    )

    max_results = gr.Slider(
        minimum=4, maximum=15, value=10, step=1,
        label="Number of YouTube videos to analyze"
    )

    search_btn = gr.Button("Search")

    output = gr.Markdown()

    search_btn.click(
        fn=gradio_semantic_search,
        inputs=[yt_query, user_query, max_results],
        outputs=output
    )

demo.launch(share=True)
