In [None]:
import google.generativeai as genai
import os
import requests
from IPython.display import display, HTML
from difflib import SequenceMatcher

# ================================
# CONFIG
# ================================
API_KEY = ""

if not API_KEY:
    raise ValueError("API key not found. Please set the GEMINI_API_KEY environment variable.")

genai.configure(api_key=API_KEY)
MODEL_NAME = "gemini-2.5-flash-lite"

# ================================
# LOAD TXT FILE
# ================================
def load_text(file_path: str) -> str:
    """Reads and returns the content of a text file."""
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()
    except FileNotFoundError:
        return ""

# ================================
# EXTRACT TOPICS
# ================================
def extract_topics(text: str) -> list:
    """
    Uses the Gemini API to identify top 10 concepts and generates a detailed study
    breakdown for each, including a list of key topics from the textbook.
    """
    model = genai.GenerativeModel(MODEL_NAME)

    # Step 1: Get the list of top 10 concepts first
    list_prompt = f"""
    Based on the following text, list the top 10 most important concepts for a 7th-grade student
    following the NCERT curriculum. Return the output as a simple, unnumbered list with each
    concept on a new line. Do not include any additional text or explanations.

    Text:
    {text}
    """

    list_response = model.generate_content(list_prompt)
    concepts = [c.strip() for c in list_response.text.split('\n') if c.strip()]

    # Step 2: Generate a detailed breakdown for each concept
    detailed_outputs = []
    for concept in concepts:
        detail_prompt = f"""
        Provide a comprehensive, structured breakdown for the NCERT 7th-grade concept: "{concept}".
        The content should be specifically tailored for a 7th-grade student.

        ### Concept: {concept}

        #### Description
        [Provide a simple, easy-to-understand description of the concept.]

        #### Key Topics from the Textbook
        [Based *only* on the content of the textbook provided in this session, provide a concise bulleted list of the specific topics and sub-topics that are directly related to this concept. Do not include any information that was not in the original text.]

        #### Detail
        [Elaborate on the key components and facts of the concept.]

        #### Working
        [Explain how the concept works or the process involved, step by step.]

        #### Intuition / Logical Flow
        [Describe the underlying logic or an intuitive way to understand the concept.]

        #### Critical Thinking
        [Pose a thought-provoking question that requires applying the concept creatively.]

        #### Open-Ended MCQs
        [Create 3-5 multiple-choice questions with a single correct answer.]

        #### Real-Life Applications
        [List 2-3 real-life examples where this concept is used or observed.]

        #### Relation Between Sub-Concepts
        [Explain how different parts or related ideas within this concept connect.]

        #### Cross-Concept Critical Thinking
        [Ask a question that links this concept to another from a different chapter or subject.]

        #### Exam-Oriented Questions
        [Provide 2-3 typical short/long answer questions asked in exams.]

        #### Metrics Estimation
        - **Estimated Avg time taken for the 1st time learners:** 10 min
        - **Estimated Avg time taken for the 2nd time learners:** 3 min
        - **# of revisions needed to master:** 3 (interval of revisit in days: 5)

        #### Importance
        - **Importance for the rest of the chapter:** [Estimate a percentage]
        - **Importance for the Olympiad:** [Estimate as Rare, Low, Medium, High]
        - **Importance for the IIT/NEET:** [Estimate as Very very rare, Rare, Low, Medium, High]
        - **Definition getting asked in the unit test:** [Estimate as Low, Medium, High]

        #### Which aspect is important?
        [e.g., Def/concept/word problem?]

        #### Should you remember this?
        [Yes/No, with a brief reason]

        ---

        #### Exam Preparation, Common Mistakes, Exam Tips
        - **Exam Preparation:** [Provide a tip for exam preparation for this concept.]
        - **Common Mistakes:** [List a common error students make with this concept.]
        - **Exam Tips:** [Provide a final exam-day tip for this concept.]
        """
        detail_response = model.generate_content(detail_prompt)
        detailed_outputs.append(detail_response.text)

    return concepts, detailed_outputs

# ================================
# MAIN
# ================================
if __name__ == "__main__":
    file_path = "/content/gecu112.txt"
    text = load_text(file_path)

    if not text:
        print(f"Error: The file '{file_path}' was not found or is empty. Please check the path and content.")
    else:
        try:
            concepts, topics_breakdown = extract_topics(text)

            print("Top 10 Topics from the Textbook:")
            for i, concept in enumerate(concepts):
                print(f"{i+1}. {concept}")

            print("\n📌 Detailed Topic Breakdown:\n")
            for breakdown in topics_breakdown:
                print(breakdown)
                print("-" * 50)

        except Exception as e:
            print(f"An error occurred: {e}")

In [None]:

import requests
from bs4 import BeautifulSoup
import re
from groq import Groq
import webbrowser

API_KEY = ""
YOUTUBE_API_KEY =""  # <-- Replace with your YouTube API key

# Initialize Groq client
try:
    groq_client = Groq(api_key=API_KEY)
except Exception as e:
    print(f"Error initializing Groq client: {e}")
    exit()

# ---------------- LLM HELPERS ----------------

def get_llm_one_word_title(topic):
    prompt = f"""
    Suggest a single one-word alternative for the topic "{topic}" that is likely to exist as a Wikipedia article title.
    Output only the word, nothing else.
    """
    try:
        chat_completion = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": "Output only one word, no explanations."},
                {"role": "user", "content": prompt}
            ],
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            temperature=0.3,
            max_tokens=10
        )
        return chat_completion.choices[0].message.content.strip()
    except Exception as e:
        print(f"LLM error: {e}")
        return topic

def validate_caption_for_student(caption):
    prompt = f"""
    You are a content validator for 7th standard NCERT students.
    Check the following image description:

    "{caption}"

    Criteria:
    1. Must be very simple and easy for a 7th standard NCERT student.
    2. Must not contain complex or advanced terms, jargon, or technical language.
    3. Must not contain explicit, violent, or inappropriate content.
    4. Must meaningfully describe the image in a basic way.

    Reply with only "VALID" if it meets these criteria, or "INVALID" otherwise.
    """
    try:
        response = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are a content validator for school students."},
                {"role": "user", "content": prompt}
            ],
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            temperature=0,
            max_tokens=5
        )
        result = response.choices[0].message.content.strip().upper()
        return result == "VALID"
    except Exception as e:
        print(f"LLM validation error: {e}")
        return False

def classify_image_to_subtopic(topic, description):
    prompt = f"""
    Classify the following image description into a subtopic of "{topic}".
    If unsure, just reply with the closest keyword from the topic.

    Description: "{description}"

    Reply with only the subtopic word/phrase, nothing else.
    """
    try:
        response = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are a subtopic classifier."},
                {"role": "user", "content": prompt}
            ],
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            temperature=0.3,
            max_tokens=15
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"LLM classification error: {e}")
        return "General"

# ---------------- YOUTUBE HELPERS ----------------

def validate_youtube_description(topic, subtopic, description):
    prompt = f"""
    You are validating a YouTube video description for 7th standard NCERT students.

    Topic: {topic}
    Subtopic: {subtopic}
    Description: "{description}"

    Criteria:
    1. Must be relevant to the topic and subtopic.
    2. Must be simple and easy for a 7th standard NCERT student.
    3. Must not be too advanced or for higher classes.
    4. Must not contain spam, random, or promotional content.

    Reply with only "VALID" or "INVALID".
    """
    try:
        response = groq_client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are a YouTube video validator for school students."},
                {"role": "user", "content": prompt}
            ],
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            temperature=0,
            max_tokens=5
        )
        return response.choices[0].message.content.strip().upper() == "VALID"
    except Exception as e:
        print(f"YouTube validation error: {e}")
        return False

def fetch_youtube_video(topic, subtopic):
    search_query = f"{topic} {subtopic} for class 7 NCERT"
    url = f"https://www.googleapis.com/youtube/v3/search?part=snippet&type=video&q={search_query}&key={YOUTUBE_API_KEY}&maxResults=5"

    try:
        response = requests.get(url)
        if response.status_code != 200:
            print(f"⚠️ YouTube search failed: {response.status_code}")
            return None

        results = response.json().get("items", [])
        for item in results:
            video_id = item["id"]["videoId"]
            title = item["snippet"]["title"]
            description = item["snippet"].get("description", "").strip()

            if not description:
                continue

            if validate_youtube_description(topic, subtopic, description):
                video_url = f"https://www.youtube.com/watch?v={video_id}"
                return {"url": video_url, "title": title, "description": description}

    except Exception as e:
        print(f"YouTube API error: {e}")

    return None

# ---------------- SCRAPER ----------------

def fetch_wikipedia_images(title):
    formatted_title = title.replace(" ", "_")
    url = f"https://en.wikipedia.org/wiki/{formatted_title}"

    print(f"\n🔎 Attempting page: {url}")

    try:
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
        if response.status_code != 200:
            print(f"⚠️ Page not found: {title}")
            return []
    except requests.exceptions.RequestException:
        print(f"⚠️ Request error for: {title}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    all_images = []

    img_tags = soup.find_all('img', src=re.compile(r'//upload.wikimedia.org/'))
    for img in img_tags:
        try:
            width = int(img.get('width') or 0)
        except ValueError:
            width = 0
        if width < 100:
            continue

        img_url = 'https:' + img.get('src')
        description = None

        figure_tag = img.find_parent('figure')
        if figure_tag:
            figcaption = figure_tag.find('figcaption')
            if figcaption:
                description = figcaption.get_text(strip=True)

        if not description:
            thumb_caption = img.find_parent('div', class_='thumb')
            if thumb_caption:
                caption_tag = thumb_caption.find('div', class_='thumbcaption')
                if caption_tag:
                    description = caption_tag.get_text(strip=True)

        if not description:
            description = img.get('alt') or img.get('title')

        if not description:
            continue

        if any(keyword in img_url.lower() for keyword in ['icon', 'logo', 'ui', 'edit']):
            continue

        if validate_caption_for_student(description):
            subtopic = classify_image_to_subtopic(title, description)
            yt_video = fetch_youtube_video(title, subtopic)
            all_images.append({
                'url': img_url,
                'description': description,
                'subtopic': subtopic,
                'youtube': yt_video
            })
        else:
            print(f"❌ Rejected caption: {description}")

    return all_images


def get_wikipedia_images(topic, limit=6):
    images = fetch_wikipedia_images(topic)

    if not images:
        print("\n⚠️ No valid images found. Trying LLM one-word alternative...")
        alt_title = get_llm_one_word_title(topic)
        print(f"💡 Using alternative title: {alt_title}")
        images = fetch_wikipedia_images(alt_title)

    return images[:limit]

# ---------------- MAIN ----------------
if __name__ == "__main__":
    topics = [
    "Earth's Rotation",
    "Day and Night Cycle",
    "Earth's Revolution",
    "Seasons",
    "Tilt of Earth's Axis",
    "Apparent Motion of Celestial Bodies",
    "Solar Eclipse",
    "Lunar Eclipse",
    "Solstice",
    "Equinox"
]


    for user_topic in topics:
        print("="*50)
        print(f"** Processing topic: {user_topic} **")
        images_data = get_wikipedia_images(user_topic, limit=6)

        if images_data:
            print(f"\n✅ Found {len(images_data)} valid images for '{user_topic}':\n")
            for i, img in enumerate(images_data, start=1):
                print(f"--- Image {i} ---")
                print(f"**Image URL:** {img['url']}")
                print(f"**Description:** {img['description']}")
                print(f"**Subtopic:** {img['subtopic']}")
                if img['youtube']:
                    print(f"**YouTube Video:** {img['youtube']['url']}")
                    print(f"**Video Title:** {img['youtube']['title']}")
                    print(f"**Video Description:** {img['youtube']['description']}\n")
                webbrowser.open_new_tab(img['url'])
        else:
            print(f"\n❌ Could not find any valid images for '{user_topic}'.")