In [13]:
import json
import re
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from langchain_groq import ChatGroq

# Groq AI API
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0,
    groq_api_key="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"
)

# Function to get the transcript of the YouTube video
def get_transcript(youtube_url):
    try:
        video_id = youtube_url.split('v=')[-1]  # Extract video ID from URL
        transcript = None
        formatter = TextFormatter()

        # Try to get the Hindi transcript first
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['hi'])
            language = 'hi'
        except:
            # If Hindi is not available, fall back to English
            try:
                transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
                language = 'en'
            except:
                print(json.dumps({"error": "Neither Hindi nor English transcript is available."}))
                return None, None
#         print("Transcript = ",transcript)
        return formatter.format_transcript(transcript), language
    except Exception as e:
        print(json.dumps({"error": f"Error fetching transcript: {str(e)}"}))
        return None, None

# Function to generate summary and quiz based on the transcript
def generate_summary_and_quiz(transcript, num_questions, language):
    prompt = f"""Summarize the following transcript in about 200 words.
    Then create a quiz with {num_questions} multiple-choice questions based on the content.
    The quiz questions and options should always be in English, regardless of the transcript language.
    Format the output as follows:
    Summary: [Your summary here]
    Quiz:
    Q1. [Question in English]
    A. [Option A in English]
    B. [Option B in English]
    C. [Option C in English]
    D. [Option D in English]
    Correct Answer: [Correct option letter]

    [Repeat for all questions]

    Transcript: {transcript}
    """
    
    response = llm.invoke(prompt)
    return response.content

# Function to parse the generated content into summary and quiz format
def parse_content(content):
    quiz = []
    
    # Extract quiz questions
    questions = re.findall(r"Q\d+\.(.*?)Correct Answer:", content, re.DOTALL | re.IGNORECASE)
    answers = re.findall(r"Correct Answer:\s*(\w)", content, re.IGNORECASE)
    
    for q, a in zip(questions, answers):
        question_parts = q.strip().split('\n')
        question_text = question_parts[0].strip()
        options = [opt.strip() for opt in question_parts[1:5]]
        
        quiz.append({
            "question": question_text,
            "options": options,
            "answer": options[ord(a.upper()) - ord('A')]
        })
    
    return quiz

# Function to generate the final JSON output
def generate_json(youtube_link, num_questions):
    transcript, language = get_transcript(youtube_link)
    if not transcript:
        return None

    content = generate_summary_and_quiz(transcript, num_questions, language)
    quiz = parse_content(content)

    result = {
        "quiz": quiz
    }

    return json.dumps(result, indent=2, ensure_ascii=False)

# Main function to handle user input
def main():
    youtube_link = input("Enter the YouTube video link: ")
    
    # Ask user for the number of quiz questions and validate it
    while True:
        try:
            num_questions = int(input("Enter the number of quiz questions: "))
            break
        except ValueError:
            print("Invalid input. Please enter a valid integer for the number of questions.")

    # Generate JSON content
    json_output = generate_json(youtube_link, num_questions)

    if json_output:
        print("Generated Quiz:\n", json_output)
    else:
        print(json.dumps({"error": "Failed to generate content"}))

# Execute the main function when the script is run
if __name__ == "__main__":
    main()

Enter the YouTube video link: https://www.youtube.com/watch?v=CO4E_9V6li0&t=786s
Enter the number of quiz questions: 2
Generated Quiz:
 {
  "quiz": [
    {
      "question": "What is the main purpose of the Cold Email Generator project?",
      "options": [
        "A. To generate cold emails to potential clients for software services companies.",
        "B. To extract job skills and descriptions from job postings.",
        "C. To create a basic UI for the tool using Streamlit.",
        "D. To use LangChain and ChromaDB for building LLM applications."
      ],
      "answer": "A. To generate cold emails to potential clients for software services companies."
    },
    {
      "question": "What is the name of the open-source LLM used in the project?",
      "options": [
        "A. LLaMA 3.1",
        "B. LangChain",
        "C. ChromaDB",
        "D. Streamlit"
      ],
      "answer": "A. LLaMA 3.1"
    }
  ]
}
