In [57]:
import os, re, json, time
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
import google.generativeai as genai
from dotenv import load_dotenv

load_dotenv()  # loads .env if present
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
MODEL_NAME ="models/gemini-3-flash-preview"



In [58]:


def get_transcript_with_pytube(url, languages=None, fallback=True):
    """
    Extracts a word-for-word transcript from a YouTube video.
    Works with older youtube-transcript-api versions that return FetchedTranscriptSnippet objects.
    """

    # --- Extract video ID safely ---
    if "youtu.be/" in url:
        video_id = url.split("/")[-1].split("?")[0]
    else:
        video_id = url.split("v=")[-1].split("&")[0]

    # instantiate YouTubeTranscriptApi before calling .list()
    ytt = YouTubeTranscriptApi()
    transcript_list = ytt.list(video_id)

    # Try to find a transcript in preferred languages
    transcript = None
    if languages:
        try:
            transcript = transcript_list.find_transcript(languages)
        except Exception:
            pass

    # Prefer manually created transcripts if available
    if transcript is None:
        try:
            transcript = transcript_list.find_manually_created_transcript(languages)
        except Exception:
            try:
                transcript = transcript_list.find_generated_transcript(languages)
            except Exception:
                pass

    if transcript is None:
        raise Exception("No transcript found for video_id=" + video_id)

    # Fetch transcript data (older versions return FetchedTranscriptSnippet objects)
    fetched = transcript.fetch()

    # Handle both dicts and object types
    try:
        full_text = " ".join([item["text"] for item in fetched if item.get("text")])
    except Exception:
        full_text = " ".join([item.text for item in fetched if hasattr(item, "text")])

    return full_text.strip()



In [59]:
def analyze_transcript_with_gemini(transcript_text):
    prompt = f"""
You are an expert educational evaluator.
Analyze this YouTube video transcript and rate it objectively.
The rating can be a floating integer, be as accurate as possible and be stringent with the rating.
Return ONLY valid JSON with fields:

{{
  "clarity": 1-10,
  "concept_depth": 1-10,
  "engagement": 1-10,
  "promotion": 1-10,
  "summary": "short summary",
  "overall_rating": 1-10
}}

Transcript:
\"\"\"{transcript_text}\"\"\"
    """

    model = genai.GenerativeModel(MODEL_NAME)
    response = model.generate_content(prompt)
    text = getattr(response, "text", str(response))

    # Extract JSON from the model output
    try:
        return json.loads(text)
    except:
        match = re.search(r"\{[\s\S]*\}", text)
        if match:
            return json.loads(match.group(0))
        raise ValueError("Could not parse JSON output.")


In [60]:
url = "https://youtu.be/riXcZT2ICjA?si=6NlTuPR0wYD8fzl0"   #Enter the youtube video link here
print(" Fetching transcript...")
transcript = get_transcript_with_pytube(url, languages=['en','hi'])
print(f"Transcript fetched ({len(transcript)} characters)\n")

print("Analyzing with Gemini...")
result = analyze_transcript_with_gemini(transcript)
print(json.dumps(result, indent=2, ensure_ascii=False))


 Fetching transcript...
Transcript fetched (9467 characters)

Analyzing with Gemini...
{
  "clarity": 9.5,
  "concept_depth": 6.0,
  "engagement": 8.5,
  "promotion": 1.0,
  "summary": "An intuitive introduction to the concept of limits using two primary examples: a rational function with a removable discontinuity and a piecewise function. The instructor effectively distinguishes between the value of a function at a point and the value the function approaches as it nears that point, supplemented by a numerical demonstration using a calculator.",
  "overall_rating": 8.7
}
