In [3]:
import json
import re
import google.generativeai as genai
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter

# Configure the API key for Google Generative AI
GOOGLE_API_KEY = "AIzaSyAC1OLrVT3QooZay0B8x3hPoBYLqcT3oNE"
genai.configure(api_key=GOOGLE_API_KEY)

# Function to get the transcript of the YouTube video
def get_transcript(youtube_url):
    try:
        video_id = youtube_url.split('v=')[-1]  # Extract video ID from URL
        transcript = None
        formatter = TextFormatter()

        # Try to get the Hindi transcript first
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['hi'])
            language = 'hi'
        except:
            # If Hindi is not available, fall back to English
            try:
                transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
                language = 'en'
            except:
                print(json.dumps({"error": "Neither Hindi nor English transcript is available."}))
                return None, None

        return formatter.format_transcript(transcript), language
    except Exception as e:
        print(json.dumps({"error": f"Error fetching transcript: {str(e)}"}))
        return None, None

# Function to generate summary and quiz based on the transcript
def generate_summary_and_quiz(transcript, num_questions, language):
    prompt = f"""Summarize the following transcript in about 200 words.
    Then create a quiz with {num_questions} multiple-choice questions based on the content.
    The quiz questions and options should always be in English, regardless of the transcript language.
    Format the output as follows:
    Summary: [Your summary here]
    Quiz:
    Q1. [Question in English]
    A. [Option A in English]
    B. [Option B in English]
    C. [Option C in English]
    D. [Option D in English]
    Correct Answer: [Correct option letter]

    [Repeat for all questions]

    Transcript: {transcript}
    """
    
    # Use Google's Generative AI model to generate content
    model = genai.GenerativeModel(model_name="gemini-1.5-pro")
    response = model.generate_content(prompt)
    return response.text

# Function to parse the generated content into summary and quiz format
def parse_content(content):
    summary = ""
    quiz = []
    
    # Extract summary
    summary_match = re.search(r"Summary:(.*?)Quiz:", content, re.DOTALL | re.IGNORECASE)
    if summary_match:
        summary = summary_match.group(1).strip()
    
    # Extract quiz questions
    questions = re.findall(r"Q\d+\.(.*?)Correct Answer:", content, re.DOTALL | re.IGNORECASE)
    answers = re.findall(r"Correct Answer:\s*(\w)", content, re.IGNORECASE)
    
    for q, a in zip(questions, answers):
        question_parts = q.strip().split('\n')
        question_text = question_parts[0].strip()
        options = [opt.strip() for opt in question_parts[1:5]]
        
        quiz.append({
            "question": question_text,
            "options": options,
            "answer": options[ord(a.upper()) - ord('A')]
        })
    
    return summary, quiz

# Function to generate the final JSON output
def generate_json(youtube_link, num_questions):
    transcript, language = get_transcript(youtube_link)
    if not transcript:
        return None

    content = generate_summary_and_quiz(transcript, num_questions, language)
    summary, quiz = parse_content(content)

    result = {
        "summary": summary,
        "quiz": quiz
    }

    return json.dumps(result, indent=2, ensure_ascii=False)

# Main function to handle user input
def main():
    youtube_link = input("Enter the YouTube video link: ")
    
    # Ask user for the number of quiz questions and validate it
    while True:
        try:
            num_questions = int(input("Enter the number of quiz questions: "))
            break
        except ValueError:
            print("Invalid input. Please enter a valid integer for the number of questions.")

    # Generate JSON content
    json_output = generate_json(youtube_link, num_questions)

    if json_output:
        print("Generated Output:\n", json_output)
    else:
        print(json.dumps({"error": "Failed to generate content"}))

# Execute the main function when the script is run
if __name__ == "__main__":
    main()


Enter the YouTube video link: https://www.youtube.com/watch?v=CO4E_9V6li0&t=756s&pp=ygULbGxtIHByb2plY3Q%3D
Enter the number of quiz questions: 5
Generated Output:
 {
  "summary": "This transcript is a tutorial on building a cold email generator for software service companies using the Llama 3.1 language model. The tool automates the process of identifying potential clients through job postings, extracting relevant skills, and generating personalized cold emails with links to the company's portfolio. \n\nThe tutorial covers using various tools and technologies, including:\n\n* **Llama 3.1:** An open-source LLM used for text generation and information extraction.\n* **Langchain:** A framework for building applications with LLMs.\n* **Chroma DB:** A vector database for storing and querying information based on semantic meaning.\n* **Streamlit:** A framework for building interactive user interfaces.\n\nThe tutorial demonstrates how to scrape job postings from websites, extract relevant inf