In [1]:
import os
import pandas as pd
import numpy as np
import faiss
import json
import google.generativeai as genai

# ============================================================
# 1Ô∏è‚É£ Build FAISS Index if Missing
# ============================================================
def build_faiss_index_if_missing(csv_path, index_path):
    """
    Builds and saves a FAISS index from CSV embeddings if it doesn't exist.
    """
    if os.path.exists(index_path):
        print("‚úÖ FAISS index found. Skipping rebuild.")
        return

    print("‚öôÔ∏è Building FAISS index...")
    df = pd.read_csv(csv_path)
    df["text_embedding"] = df["text_embedding"].fillna("[]")

    embeddings = []
    for emb_str in df["text_embedding"]:
        try:
            emb = np.array(json.loads(emb_str), dtype="float32")
        except:
            emb = np.array([float(x) for x in emb_str.strip("[]").split(",")], dtype="float32")
        embeddings.append(emb)
    embeddings = np.vstack(embeddings)

    # Create FAISS index
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    # Save the index
    faiss.write_index(index, index_path)
    print(f"‚úÖ FAISS index created and saved as: {index_path}")


# ============================================================
# 2Ô∏è‚É£ Load FAISS Index and Metadata
# ============================================================
def load_faiss_index(csv_path, index_path):
    df = pd.read_csv(csv_path)
    df["transcript"] = df["transcript"].fillna("")
    index = faiss.read_index(index_path)
    return df, index


# ============================================================
# 3Ô∏è‚É£ Retrieve Transcript from FAISS DB using Video ID
# ============================================================
def get_transcript_from_faiss(video_id, df):
    row = df[df["video_id"] == video_id]
    if row.empty:
        return None
    return row["transcript"].values[0]


# ============================================================
# 4Ô∏è‚É£ Summarize using Google Gemini API
# ============================================================
def summarize_with_gemini(transcript, api_key):
    """
    Summarize transcript using Google Gemini API.
    """
    try:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel("gemini-2.0-flash")  # ‚úÖ latest stable model
        prompt = f"Summarize the following YouTube transcript in a structured, concise format:\n\n{transcript}"
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        return f"‚ö†Ô∏è Error using Gemini API: {e}"


# ============================================================
# 5Ô∏è‚É£ Main Summarizer Function
# ============================================================
def summarize_video(video_id, csv_path, index_path, api_key):
    """
    Retrieve transcript from FAISS DB and summarize using Gemini.
    """
    build_faiss_index_if_missing(csv_path, index_path)

    df, index = load_faiss_index(csv_path, index_path)
    transcript = get_transcript_from_faiss(video_id, df)

    if not transcript:
        return "‚ùå No transcript found for this video ID."

    summary = summarize_with_gemini(transcript, api_key)
    return summary


# ============================================================
# 6Ô∏è‚É£ Main Script (User Input)
# ============================================================
if __name__ == "__main__":
    csv_path = "youtube_details_with_embeddings.csv"
    index_path = "youtube_faiss.index"

    video_id = input("üé• Enter YouTube Video ID: ").strip()
    GEMINI_API_KEY = "AIzaSyAFre0EggP__Z_0w6-ydA_6-semKV7B3iA" #AIzaSyAFre0EggP__Z_0w6-ydA_6-semKV7B3iA

    summary = summarize_video(video_id, csv_path, index_path, GEMINI_API_KEY)

    print("\nüé¨ ====== VIDEO SUMMARY ======\n")
    print(summary)


üé• Enter YouTube Video ID:  t8txtQkhMcY


‚úÖ FAISS index found. Skipping rebuild.


Okay, here's a concise summary of the YouTube transcript provided:

**Topic:** Mount Kailash in Tibet

**Main Point:** Scientists cannot explain what is happening on Mount Kailash, which has never been conquered.
