In [1]:
from typing import Literal,Annotated
from langchain_core.tools import tool
from langchain_groq import ChatGroq
from langgraph.graph import StateGraph,MessagesState,START,END
from langgraph.types import Command
from dotenv import load_dotenv
from IPython.display import display, Image
from langchain_huggingface import ChatHuggingFace,HuggingFaceEndpoint
from langchain_core.messages import HumanMessage,AIMessage,SystemMessage
from langgraph.prebuilt import create_react_agent
from langchain_experimental.utilities import PythonREPL
from youtube_search import YoutubeSearch
from langchain_community.tools import DuckDuckGoSearchRun
import sqlite3
from langchain_tavily import TavilySearch
from langgraph_supervisor import create_supervisor

from langchain_community.utilities.semanticscholar import SemanticScholarAPIWrapper
from langchain.schema import HumanMessage
from typing import List, Dict
import requests
from io import BytesIO
from PyPDF2 import PdfReader
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled, VideoUnavailable
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough


In [2]:
load_dotenv()

True

In [3]:
groq_model=ChatGroq(model="deepseek-r1-distill-llama-70b")

In [43]:
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.2-3B-Instruct",
    task="text-generation",
    do_sample=False,
    top_p=1.0,
    top_k=0,
    provider="auto"  # let Hugging Face choose the best provider for you
)
hf_model = ChatHuggingFace(llm=llm)
hf_model.invoke([HumanMessage(content="Hello World")])

AIMessage(content="Hello World! It's nice to meet you. Is there something I can help you with or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 37, 'total_tokens': 63}, 'model_name': 'meta-llama/Llama-3.2-3B-Instruct', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--74e150be-88a8-478d-a36f-1f3a40902098-0', usage_metadata={'input_tokens': 37, 'output_tokens': 26, 'total_tokens': 63})

In [None]:
# llm = HuggingFaceEndpoint(
#     repo_id="openai/gpt-oss-20b",
#     task="text-generation",
#     do_sample=False,
#     top_p=1.0,
#     top_k=0,
#     provider="auto",  # let Hugging Face choose the best provider for you
# )

# hf_model = ChatHuggingFace(llm=llm)

In [5]:
# groq_model.invoke([HumanMessage(content="hii how are you?")])

In [6]:
@tool
def youtube_search(query: str) -> str:
    """
    Search YouTube for videos related to the query and return top 3 video links with titles.
    """
    import requests
    API_KEY = "AIzaSyBNBTgze_5FR5VHzfZlxc38iLwr7xyYaHE"
    url = "https://www.googleapis.com/youtube/v3/search"
    params = {
        "part": "snippet",
        "q": query,
        "type": "video",
        "maxResults": 3,
        "key": API_KEY
    }
    res = requests.get(url, params=params).json()
    items = res.get("items", [])
    videos = [
        f"{item['snippet']['title']} - https://www.youtube.com/watch?v={item['id']['videoId']}"
        for item in items if item.get('id', {}).get('videoId')
    ]
    if not videos:
        return "No related videos found."
    return "\n".join(videos)



In [7]:

@tool
def generate_resume(job_description: str) -> str:
    """
    Generate a structured resume and cover letter based on the job description.
    Returns output in clean, formatted HTML for easy display.
    """
    
    html_template = f"""
    <html>
    <head>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.5; }}
            h2 {{ color: #2E86C1; }}
            h3 {{ color: #117A65; }}
            p {{ margin-bottom: 10px; }}
            .section {{ margin-bottom: 20px; }}
            .highlight {{ color: #D35400; font-weight: bold; }}
        </style>
    </head>
    <body>
        <div class="section">
            <h2>📄 Resume - Tailored for: {job_description}</h2>
            <h3>Summary</h3>
            <p>Experienced professional with skills and expertise matching <span class="highlight">{job_description}</span>. 
            Strong problem-solving abilities and a passion for excellence in the relevant field.</p>
        </div>
        <div class="section">
            <h3>Skills</h3>
            <p>• Core skills relevant to {job_description}<br>
               • Additional complementary skills<br>
               • Technical & soft skills</p>
        </div>
        <div class="section">
            <h3>Experience</h3>
            <p>• Previous roles and achievements tailored to {job_description}<br>
               • Demonstrated impact and measurable results</p>
        </div>
        <div class="section">
            <h2>✉️ Cover Letter</h2>
            <p>Dear Hiring Manager,</p>
            <p>I am excited to apply for the <span class="highlight">{job_description}</span> role. 
            With my background, skills, and passion for this field, I am confident in delivering impactful results. 
            I look forward to contributing to your team’s success.</p>
            <p>Best regards,<br>Your Name</p>
        </div>
    </body>
    </html>
    """
    return html_template


In [8]:
def extract_text_from_pdf(url: str, max_pages: int = 5) -> str:
    try:
        response = requests.get(url, timeout=15)
        if response.status_code != 200:
            return ""
        pdf_file = BytesIO(response.content)
        reader = PdfReader(pdf_file)
        text = [page.extract_text() or "" for page in reader.pages[:max_pages]]
        return "\n".join(text)
    except Exception as e:
        return f"Error extracting PDF: {e}"

# -------------------------
# Initialize Semantic Scholar API
# -------------------------
ss = SemanticScholarAPIWrapper(
    top_k_results=5,
    load_max_docs=5
)

# -------------------------
# Unified Semantic Scholar Tool
# -------------------------
@tool
def semantic_scholar_research(query: str, summarize: bool = True) -> List[Dict]:
    """
    Fetch top research papers from Semantic Scholar for a query.
    - Optionally summarize abstracts using LLM.
    - Attempt PDF extraction if URL is provided.
    """
    raw_results = ss.run(query)  # Returns string with paper info
    papers = raw_results.split("\n\n")
    summarized_papers = []

    for paper in papers:
        summary = ""
        pdf_summary = ""
        # Summarize abstract
        if summarize and "abstract:" in paper.lower():
            prompt = f"Summarize this research abstract in 2-3 sentences:\n\n{paper}"
            summary = hf_model.invoke([HumanMessage(content=prompt)]).content

        # Attempt PDF extraction if a PDF link is in text
        if "http" in paper and ".pdf" in paper.lower():
            pdf_url = paper.split("http")[1].split()[0]
            pdf_url = "http" + pdf_url
            pdf_text = extract_text_from_pdf(pdf_url)
            if pdf_text and summarize:
                pdf_prompt = f"Summarize this PDF content (first 500 words):\n\n{pdf_text[:3000]}"
                pdf_summary = hf_model.invoke([HumanMessage(content=pdf_prompt)]).content

        summarized_papers.append({
            "raw_info": paper,
            "summary": summary,
            "pdf_summary": pdf_summary
        })

    return summarized_papers

In [9]:

# -----------------------------
# Prompt Template
# -----------------------------
prompt_template = """
You are a helpful assistant designed to answer questions about a YouTube video link taht have been provided based on its transcript.
Answer the user's question using ONLY the provided transcript context.
If the information is not in the context, explicitly say "I cannot find information about that in the video transcript."

Transcript:
{context}

Question:
{question}

Answer:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

# -----------------------------
# Transcript Fetcher
# -----------------------------
def get_transcript(video_id: str):
    """Fetch transcript safely for old and new versions of youtube_transcript_api."""
    try:
        transcript_list = YouTubeTranscriptApi().fetch(video_id, languages=["hi"])
    except NoTranscriptFound:
        try:
            transcript_list = YouTubeTranscriptApi().fetch(video_id, languages=["en"])
        except NoTranscriptFound:
            return None, "No transcript available in Hindi or English."
    except (TranscriptsDisabled, VideoUnavailable) as e:
        return None, str(e)
    except Exception as e:
        return None, f"Unexpected error: {str(e)}"

    # Handle both dicts and FetchedTranscriptSnippet objects
    texts = []
    for snippet in transcript_list:
        if isinstance(snippet, dict):
            texts.append(snippet.get("text", ""))
        else:
            # FetchedTranscriptSnippet object
            texts.append(getattr(snippet, "text", ""))

    transcript = " ".join(texts)
    return transcript, None


# -----------------------------
# Main YouTube QA / Summarizer
# -----------------------------
@tool
def youtube_qa(video_url: str, question: str):
    """Given a YouTube URL and question, return answer based on transcript."""
    try:
        video_id = video_url.split("v=")[-1].split("&")[0]  # Extract only the video ID
        transcript, error = get_transcript(video_id)
        if error:
            return error

        rag_runnable = (
            {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
            | prompt
            | hf_model
        )
        answer = rag_runnable.invoke({"context": transcript, "question": question})
        return answer

    except Exception as e:
        return f"Error: {str(e)}"

In [10]:
duck_tool = DuckDuckGoSearchRun()
tavile_tool = TavilySearch()

In [53]:
# ─────────────────────────────────────────────────────────────────────────────
# Network-Agent Orchestration of Your Multi-Agent System (LLM-classified router)
# ─────────────────────────────────────────────────────────────────────────────

from typing import List, Dict
import re
import requests
from io import BytesIO

from dotenv import load_dotenv
from PyPDF2 import PdfReader

# LangChain / LangGraph
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_groq import ChatGroq

from langgraph.graph import StateGraph, START, END, MessagesState
from langgraph.prebuilt import create_react_agent
from langgraph.types import Command

# Tools / APIs
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_tavily import TavilySearch
from langchain_community.utilities.semanticscholar import SemanticScholarAPIWrapper

from youtube_transcript_api import (
    YouTubeTranscriptApi,
    NoTranscriptFound,
    TranscriptsDisabled,
    VideoUnavailable,
)

# ─────────────────────────────────────────────────────────────────────────────
# Env & Models
# ─────────────────────────────────────────────────────────────────────────────

load_dotenv()

# Optional: Groq model (not required by router, but kept as in your code)
hf_model = ChatGroq(model="deepseek-r1-distill-llama-70b")

# Primary chat model (HF)
# llm = HuggingFaceEndpoint(
#     repo_id="meta-llama/Llama-3.2-3B-Instruct",
#     task="text-generation",
#     do_sample=False,
#     top_p=1.0,
#     top_k=0,
#     provider="auto",
# )
# hf_model = ChatHuggingFace(llm=llm)

# Quick sanity ping (can be commented out)
_ = hf_model.invoke([HumanMessage(content="Hello World")])


# ─────────────────────────────────────────────────────────────────────────────
# Tools
# ─────────────────────────────────────────────────────────────────────────────

# @tool
# def youtube_search(query: str) -> str:
#     """
#     Search YouTube for videos related to the query and return top 3 video links with titles.
#     """
#     API_KEY = "AIzaSyBNBTgze_5FR5VHzfZlxc38iLwr7xyYaHE"  # (kept as given)
#     url = "https://www.googleapis.com/youtube/v3/search"
#     params = {
#         "part": "snippet",
#         "q": query,
#         "type": "video",
#         "maxResults": 3,
#         "key": API_KEY,
#     }
#     res = requests.get(url, params=params).json()
#     items = res.get("items", [])
#     videos = [
#         f"{item['snippet']['title']} - https://www.youtube.com/watch?v={item['id']['videoId']}"
#         for item in items
#         if item.get("id", {}).get("videoId")
#     ]
#     if not videos:
#         return "No related videos found."
#     return "\n".join(videos)
# 
API_KEY = "AIzaSyBNBTgze_5FR5VHzfZlxc38iLwr7xyYaHE"

def youtube_search_guaranteed(query: str, max_results=3):
    # Step 1: search videos
    search_url = "https://www.googleapis.com/youtube/v3/search"
    params = {
        "part": "snippet",
        "q": query,
        "type": "video",
        "maxResults": 20,  # search more for filtering
        "key": API_KEY,
    }
    search_res = requests.get(search_url, params=params).json()
    items = search_res.get("items", [])
    video_ids = [item['id']['videoId'] for item in items if item.get('id', {}).get('videoId')]
    if not video_ids:
        return []

    # Step 2: get video details to filter valid ones
    details_url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "status,snippet",
        "id": ",".join(video_ids),
        "key": API_KEY,
    }
    details_res = requests.get(details_url, params=params).json()
    valid_videos = []

    for video in details_res.get("items", []):
        status = video.get("status", {})
        if status.get("uploadStatus") != "processed":
            continue
        if status.get("privacyStatus") != "public":
            continue
        snippet = video.get("snippet", {})
        title = snippet.get("title")
        vid_id = video.get("id")
        link = f"https://www.youtube.com/watch?v={vid_id}"
        valid_videos.append(f"{title} - {link}")
        if len(valid_videos) >= max_results:
            break

    return valid_videos

# ─────────────── LLM-based Topic Extractor ───────────────


@tool
def youtube_search_agent(query: str) -> str:
    """
    Deterministically extract main keywords and fetch top 3 valid YouTube videos.
    """
    import re

    # Step 1: Extract main topic with simple heuristic (first few nouns / keywords)
    # For example, remove "Explain", "Give me", etc.
    cleaned_query = re.sub(
        r"\b(explain|give me|show|video|tutorial|about|for|the|a|an)\b", "", query, flags=re.I
    )
    # Keep only the first 5–7 words
    keywords = " ".join(cleaned_query.split()[:7])

    # Step 2: Search YouTube
    videos = youtube_search_guaranteed(keywords)
    if not videos:
        return "No valid YouTube videos found for this query."
    return "\n".join(videos)



# ─────────────── Topic Explanation Tool ───────────────
@tool
def topic_explanation(query: str) -> str:
    """ Return a concise conceptual explanation of the topic. """
    prompt = f"Explain the concept of '{query}' in 3-5 sentences in a clear, beginner-friendly way."
    return hf_model.invoke([HumanMessage(content=prompt)]).content


@tool
def generate_resume(job_description: str) -> str:
    """
    Generate a structured resume and cover letter based on the job description.
    Returns output in clean, formatted HTML for easy display.
    """
    html_template = f"""
    <html>
    <head>
        <style>
            body {{ font-family: Arial, sans-serif; line-height: 1.5; }}
            h2 {{ color: #2E86C1; }}
            h3 {{ color: #117A65; }}
            p {{ margin-bottom: 10px; }}
            .section {{ margin-bottom: 20px; }}
            .highlight {{ color: #D35400; font-weight: bold; }}
        </style>
    </head>
    <body>
        <div class="section">
            <h2>📄 Resume - Tailored for: {job_description}</h2>
            <h3>Summary</h3>
            <p>Experienced professional with skills and expertise matching <span class="highlight">{job_description}</span>. 
            Strong problem-solving abilities and a passion for excellence in the relevant field.</p>
        </div>
        <div class="section">
            <h3>Skills</h3>
            <p>• Core skills relevant to {job_description}<br>
               • Additional complementary skills<br>
               • Technical & soft skills</p>
        </div>
        <div class="section">
            <h3>Experience</h3>
            <p>• Previous roles and achievements tailored to {job_description}<br>
               • Demonstrated impact and measurable results</p>
        </div>
        <div class="section">
            <h2>✉️ Cover Letter</h2>
            <p>Dear Hiring Manager,</p>
            <p>I am excited to apply for the <span class="highlight">{job_description}</span> role. 
            With my background, skills, and passion for this field, I am confident in delivering impactful results. 
            I look forward to contributing to your team’s success.</p>
            <p>Best regards,<br>Your Name</p>
        </div>
    </body>
    </html>
    """
    return html_template


def extract_text_from_pdf(url: str, max_pages: int = 5) -> str:
    try:
        response = requests.get(url, timeout=15)
        if response.status_code != 200:
            return ""
        pdf_file = BytesIO(response.content)
        reader = PdfReader(pdf_file)
        text = [page.extract_text() or "" for page in reader.pages[:max_pages]]
        return "\n".join(text)
    except Exception as e:
        return f"Error extracting PDF: {e}"


# Semantic Scholar wrapper
ss = SemanticScholarAPIWrapper(
    top_k_results=5,
    load_max_docs=5
)

@tool
def semantic_scholar_research(query: str, summarize: bool = True) -> List[Dict]:
    """
    Fetch top research papers from Semantic Scholar for a query.
    - Optionally summarize abstracts using LLM.
    - Attempt PDF extraction if URL is provided.
    """
    raw_results = ss.run(query)  # returns string with paper info (lib’s format)
    papers = raw_results.split("\n\n")
    summarized_papers = []

    for paper in papers:
        summary = ""
        pdf_summary = ""
        if summarize and "abstract:" in paper.lower():
            prompt = f"Summarize this research abstract in 2-3 sentences:\n\n{paper}"
            summary = hf_model.invoke([HumanMessage(content=prompt)]).content

        if "http" in paper and ".pdf" in paper.lower():
            pdf_url = "http" + paper.split("http")[1].split()[0]
            pdf_text = extract_text_from_pdf(pdf_url)
            if pdf_text and summarize:
                pdf_prompt = f"Summarize this PDF content (first 500 words):\n\n{pdf_text[:3000]}"
                pdf_summary = hf_model.invoke([HumanMessage(content=pdf_prompt)]).content

        summarized_papers.append(
            {
                "raw_info": paper,
                "summary": summary,
                "pdf_summary": pdf_summary,
            }
        )

    return summarized_papers


# ─────────────────────────────────────────────────────────────────────────────
# YouTube QA Tooling
# ─────────────────────────────────────────────────────────────────────────────

yt_prompt_template = """
You are a helpful assistant designed to answer questions about a YouTube video based on its transcript.
Answer the user's question using ONLY the provided transcript context.
If the information is not in the context, explicitly say "I cannot find information about that in the video transcript."

Transcript:
{context}

Question:
{question}

Answer:
"""
yt_prompt = PromptTemplate(
    template=yt_prompt_template, input_variables=["context", "question"]
)

def get_transcript(video_id: str):
    """Fetch transcript safely for old and new versions of youtube_transcript_api."""
    try:
        transcript_list = YouTubeTranscriptApi().fetch(video_id, languages=["hi"])
    except NoTranscriptFound:
        try:
            transcript_list = YouTubeTranscriptApi().fetch(video_id, languages=["en"])
        except NoTranscriptFound:
            return None, "No transcript available in Hindi or English."
    except (TranscriptsDisabled, VideoUnavailable) as e:
        return None, str(e)
    except Exception as e:
        return None, f"Unexpected error: {str(e)}"

    texts = []
    for snippet in transcript_list:
        if isinstance(snippet, dict):
            texts.append(snippet.get("text", ""))
        else:
            texts.append(getattr(snippet, "text", ""))
    transcript = " ".join(texts)
    return transcript, None


@tool
def youtube_qa(video_url: str, question: str):
    """Given a YouTube URL and question, return answer based on transcript."""
    try:
        video_id = video_url.split("v=")[-1].split("&")[0]
        transcript, error = get_transcript(video_id)
        if error:
            return error

        rag_runnable = (
            {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
            | yt_prompt
            | hf_model
        )
        answer = rag_runnable.invoke({"context": transcript, "question": question})
        return answer
    except Exception as e:
        return f"Error: {str(e)}"


# ─────────────────────────────────────────────────────────────────────────────
# External Search Tools
# ─────────────────────────────────────────────────────────────────────────────

duck_tool = DuckDuckGoSearchRun()
tavily_tool = TavilySearch()


# ─────────────────────────────────────────────────────────────────────────────
# Agents (unchanged behavior, same prompts & tools)
# ─────────────────────────────────────────────────────────────────────────────

unified_agent = create_react_agent(
    model=hf_model,
    tools=[youtube_qa, semantic_scholar_research],
    name="unified_agent",
    prompt="""
You are UNIFIED-AGENT, capable of handling YouTube transcript QA and research paper summarization.
- If a valid YouTube link is provided, use youtube_qa to answer or summarize the transcript.
- If a research query or PDF link is provided, use semantic_scholar_research to fetch and summarize papers.
- Indicate the source clearly (video or research paper).
- If info is unavailable, say so explicitly.
- Always provide concise, clear, and user-friendly answers.
"""
)


# @tool
# def topic_explanation(query: str) -> str:
#     """
#     Return a concise conceptual explanation of the topic.
#     """
#     prompt = f"Explain the concept of '{query}' in 3-5 sentences in a clear, beginner-friendly way."
#     return hf_model.invoke([HumanMessage(content=prompt)]).content

# Now the curriculum agent only orchestrates:
# curriculum_agent = create_react_agent(
#     model=hf_model,
#     tools=[duck_tool, youtube_search, topic_explanation],
#     name="curriculum_agent",
#     prompt="""
# You are CURRICULUM-GUIDE, a knowledgeable educational expert.

# Instructions:
# 1. Use the `topic_explanation` tool to generate a clear textual explanation of the user's query.
# 2. Use the `youtube_search` tool to fetch **real, top 3 YouTube video links** for the topic.
# 3. Provide optional learning steps after explanation and videos.
# 4. Format output exactly like this:

# Explanation:
# [Use topic_explanation output] and if link is not find then not return this type somethng below-' [Exact URL i]'

# YouTube Videos:
# 1. [Exact title 1] - [Exact URL 1]
# 2. [Exact title 2] - [Exact URL 2]
# 3. [Exact title 3] - [Exact URL 3]

# Learning Steps (optional):
# 1. Step one
# 2. Step two
# 3. Step three

# Always:
# - Use **actual video links** returned by `youtube_search`, never invented links.
# - Keep explanation and steps concise, clear, and informative.
# """
# )
curriculum_agent = create_react_agent(
    model=hf_model,
    tools=[topic_explanation, youtube_search_agent],
    name="curriculum_agent",
    prompt="""
You are CURRICULUM-GUIDE, a knowledgeable educational expert.

Instructions:
1. Use topic_explanation to generate a clear explanation.
2. Use youtube_search_agent to fetch **top 3 valid YouTube videos**.
3. Provide optional learning steps after explanation.
4. Never hallucinate video links. If youtube_search_agent returns empty, skip the YouTube section.

Format output like this:

Explanation:
[Explanation text]

YouTube Videos:
1. [Video 1 title] - [Video 1 URL]
2. [Video 2 title] - [Video 2 URL]
3. [Video 3 title] - [Video 3 URL]

Learning Steps (optional):
1. Step one
2. Step two
3. Step three
"""
)


debug_agent = create_react_agent(
    model=hf_model,
    tools=[youtube_search, duck_tool, tavily_tool],
    name="debug_educational_agent",
    prompt="""
You are CODE-TUTOR, a friendly and knowledgeable AI teacher and coding mentor. 
When a student asks a question or submits code, follow these rules:

1. **Explain concepts clearly** in an educational and beginner-friendly manner.
2. **Debug the code**: identify errors, explain why they occur, and suggest precise fixes.
3. **Provide learning resources**: include relevant YouTube links or tutorials . 
4. **Give examples, exercises, or mini-quizzes** when helpful to reinforce understanding.
5. **Motivate the student**: encourage practice, exploration, and curiosity.
6. **Step-by-step guidance**: never give one-line answers; always walk the student through solutions.
7. **Format output clearly**: use numbered steps, bullet points, or code blocks where applicable.

Goal: Deliver comprehensive, interactive, and motivational educational guidance with youtube video.
"""
)

resume_agent = create_react_agent(
    model=hf_model,
    tools=[generate_resume],
    name="resume_agent",
    prompt="""
You are RESUME-GUIDE, an expert in generating resumes and cover letters. Your task is to produce
a high-quality, professional, structured resume and cover letter for the given job description.

Rules:
1. Use the provided generate_resume tool for producing HTML output.
2. Tailor content dynamically to match the job description.
3. Ensure output is balanced: concise yet informative, highlighting key skills, achievements, and fit.
4. Internally iterate if needed to refine clarity, professionalism, and formatting.
5. Return only the **final polished HTML output** without any internal messages.

Sections to include:
- Summary
- Skills
- Experience
- Cover Letter

Goal: Generate a professional, polished, and ready-to-use resume + cover letter.
"""
)

news_agent = create_react_agent(
    model=hf_model,
    tools=[tavily_tool, duck_tool, youtube_search],
    name="news_agent",
    prompt="""
You are TECH-TREND-GUIDE, a proactive and knowledgeable technology analyst. Your task:

1. Fetch the latest and trending technology news from multiple sources dynamically.
2. Summarize each news point clearly in **point-wise format** for quick reading.
3. Highlight **emerging tools, frameworks, and technologies** mentioned or relevant.
4. Include **YouTube videos** if they are educational, relevant, and verified. If no video is available, skip gracefully.
5. Provide **actionable insights or recommendations** wherever possible.
6. Decide intelligently which tool (tavily_tool, duck_tool, or youtube_search) to use for each query.
7. Keep the summary **concise, engaging, and readable**; avoid overly long paragraphs.
8. Return **only the final summarized news**, do not include internal reasoning or tool calls.

Example output format:
- News Point 1: [Brief summary]  
  Video (if available): [YouTube link]  
- News Point 2: [Brief summary]  
  Video (if available): [YouTube link]  
- Trending Tools/Tech: [List of tools or technologies]  
- Recommendation: [Optional actionable insight]
"""
)

# ─────────────────────────────────────────────────────────────────────────────
# LLM-based Intent Classifier for Routing
# ─────────────────────────────────────────────────────────────────────────────

_ALLOWED_INTENTS = ["curriculum", "debug", "resume", "unified", "news"]

_CLASSIFY_SYSTEM = SystemMessage(
    content=(
        "You are a router that classifies user messages into one of these intents:\n"
        f"{', '.join(_ALLOWED_INTENTS)}.\n"
        "Return ONLY the intent name (single lowercase word). No punctuation, no extra text."
    )
)

def classify_intent(user_text: str) -> List[str]:
    """
    Classify user query into one or more agents.
    Returns a list of intents for multi-step queries.
    """
    prompt = f"""
You are a dynamic intent classifier for a multi-agent system.
You must determine which agent(s) should handle the user's query.

Agents:
1. curriculum - learning guides, roadmaps, explanations
2. debug - code debugging, errors, analysis
3. resume - resumes, cover letters, CVs
4. unified - research papers, PDFs, YouTube QA, summarization
5. news - tech news, trends, updates

Instructions:
- Analyze the user's query carefully.
- Return a COMMA-SEPARATED list of agent names that should be invoked in order.
- Return only valid agent names, lowercase, no punctuation.
- Examples:
  "Create a resume and a learning roadmap" -> "resume,curriculum"
  "Summarize a YouTube video on AI" -> "unified"
  "Fix Python code with KeyError" -> "debug"

User Query:
\"\"\"{user_text}\"\"\"

Agent(s):
"""
    msg = HumanMessage(content=prompt)
    try:
        response = hf_model.invoke([_CLASSIFY_SYSTEM, msg]).content.strip().lower()
        # Clean and split
        intents = [i.strip() for i in response.split(",") if i.strip() in _ALLOWED_INTENTS]
        return intents if intents else ["curriculum"]
    except Exception:
        return ["curriculum"]


# ─────────────────────────────────────────────────────────────────────────────
# Router Node + Graph (Network Pattern)
# ─────────────────────────────────────────────────────────────────────────────

# We’ll use MessagesState so messages flow naturally between nodes.
builder = StateGraph(MessagesState)

def router_node(state: MessagesState) -> MessagesState:
    """No-op node; conditional edges below decide the next hop."""
    return state


import re

def extract_youtube_url(text: str) -> str:
    """Extract first YouTube URL from the text, or return None."""
    match = re.search(r"(https?://www\.youtube\.com/watch\?v=[\w-]+)", text)
    return match.group(1) if match else None


def preprocess_message_for_unified(message: str) -> str:
    url = extract_youtube_url(message)
    if url:
        question = message.replace(url, "").strip()
        # Format as structured call for youtube_qa
        return f"youtube_qa video_url={url} question={question}"
    return message


def route_choice(state: MessagesState) -> str:
    last_user = ""
    for m in reversed(state["messages"]):
        if getattr(m, "type", None) == "human" or getattr(m, "role", "") == "user":
            last_user = m.content if hasattr(m, "content") else (m.get("content", "") if isinstance(m, dict) else "")
            break

    intent = classify_intent(last_user)
    mapping = {
        "curriculum": "curriculum_agent",
        "debug": "debug_agent",
        "resume": "resume_agent",
        "unified": "unified_agent",
        "news": "news_agent",
    }

    if intent == "unified":
        # Preprocess only for unified
        last_user = preprocess_message_for_unified(last_user)
        state["messages"].append(HumanMessage(content=last_user))

    # For resume, curriculum, debug, news -> do nothing extra
    return mapping.get(intent, "curriculum_agent")


# Nodes
builder.add_node("router", router_node)

# Agent nodes (callables from create_react_agent)
builder.add_node("curriculum_agent", curriculum_agent)
builder.add_node("debug_agent", debug_agent)
builder.add_node("resume_agent", resume_agent)
builder.add_node("unified_agent", unified_agent)
builder.add_node("news_agent", news_agent)

# Flow
builder.add_edge(START, "router")
builder.add_conditional_edges(
    "router",
    route_choice,
    {
        "curriculum_agent": "curriculum_agent",
        "debug_agent": "debug_agent",
        "resume_agent": "resume_agent",
        "unified_agent": "unified_agent",
        "news_agent": "news_agent",
    },
)

# End after each agent completes
builder.add_edge("curriculum_agent", END)
builder.add_edge("debug_agent", END)
builder.add_edge("resume_agent", END)
builder.add_edge("unified_agent", END)
builder.add_edge("news_agent", END)

# Compile app
network_app = builder.compile()

# ─────────────────────────────────────────────────────────────────────────────
# Example: invoke
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    # Example 1: curriculum-style query
    q1 = "Create a resume and cover letter for a Software Engineer internship"
    result1 = network_app.invoke({"messages": [{"role": "user", "content": q1}]})
    for m in result1["messages"]:
        if hasattr(m, "content"):
            print(m.content)
            print("\n" + "=" * 80 + "\n")



TypeError: unhashable type: 'list'

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# Dynamic Multi-Agent Network (Context-Aware Router)
# ─────────────────────────────────────────────────────────────────────────────

from typing import List
import re
import requests
from io import BytesIO
import json

from dotenv import load_dotenv
from PyPDF2 import PdfReader

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import ChatHuggingFace
from langchain_groq import ChatGroq

from langgraph.graph import StateGraph, START, END, MessagesState
from langgraph.prebuilt import create_react_agent

from langchain_community.tools import DuckDuckGoSearchRun
from langchain_tavily import TavilySearch
from langchain_community.utilities.semanticscholar import SemanticScholarAPIWrapper
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled, VideoUnavailable

# ─────────────────────────────────────────────────────────────────────────────
# Env & Models
# ─────────────────────────────────────────────────────────────────────────────

load_dotenv()
# hf_model = ChatGroq(model="deepseek-r1-distill-llama-70b")
# _ = hf_model.invoke([HumanMessage(content="Hello World")])

# ─────────────────────────────────────────────────────────────────────────────
# Tools
# ─────────────────────────────────────────────────────────────────────────────

API_KEY = "AIzaSyBNBTgze_5FR5VHzfZlxc38iLwr7xyYaHE"

def youtube_search_guaranteed(query: str, max_results=3):
    search_url = "https://www.googleapis.com/youtube/v3/search"
    params = {"part": "snippet", "q": query, "type": "video", "maxResults": 20, "key": API_KEY}
    search_res = requests.get(search_url, params=params).json()
    items = search_res.get("items", [])
    video_ids = [item['id']['videoId'] for item in items if item.get('id', {}).get('videoId')]
    if not video_ids:
        return []

    details_url = "https://www.googleapis.com/youtube/v3/videos"
    params = {"part": "status,snippet", "id": ",".join(video_ids), "key": API_KEY}
    details_res = requests.get(details_url, params=params).json()
    valid_videos = []

    for video in details_res.get("items", []):
        status = video.get("status", {})
        if status.get("uploadStatus") != "processed" or status.get("privacyStatus") != "public":
            continue
        snippet = video.get("snippet", {})
        title = snippet.get("title")
        vid_id = video.get("id")
        valid_videos.append(f"{title} - https://www.youtube.com/watch?v={vid_id}")
        if len(valid_videos) >= max_results:
            break
    return valid_videos

@tool
def youtube_search_agent(query: str) -> str:
    cleaned_query = re.sub(r"\b(explain|give me|show|video|tutorial|about|for|the|a|an)\b", "", query, flags=re.I)
    keywords = " ".join(cleaned_query.split()[:7])
    videos = youtube_search_guaranteed(keywords)
    if not videos:
        return "No valid YouTube videos found."
    return "\n".join(videos)

@tool
def topic_explanation(query: str) -> str:
    prompt = f"Explain the concept of '{query}' in 3-5 sentences, clearly and beginner-friendly."
    return hf_model.invoke([HumanMessage(content=prompt)]).content

@tool
def resume_tool(user_query: str, existing_resume: str = None) -> str:
    """
    Dynamically decide whether to:
    1. Generate a new resume + cover letter (if no existing_resume provided)
    2. Review and provide feedback on an existing resume (if existing_resume provided)
    """
    if existing_resume:
        mode = "review"
    else:
        mode = "create"

    if mode == "create":
        html_template = f"""
        <html><body>
        <h2>Resume - Tailored for: {user_query}</h2>
        <h3>Summary</h3><p>Experienced professional with skills matching {user_query}.</p>
        <h3>Skills</h3><p>• Core skills relevant to {user_query}</p>
        <h3>Experience</h3><p>• Tailored achievements and impact.</p>
        <h3>Cover Letter</h3><p>Dear Hiring Manager, I am excited to apply for {user_query}...</p>
        </body></html>
        """
        return html_template
    else:
        prompt = f"""
You are a professional resume reviewer. Analyze the following resume and provide:
1. Key improvements needed
2. Formatting & clarity feedback
3. Skills & experience suggestions

Resume Content:
{existing_resume}

Return a structured, concise, and constructive feedback.
"""
        feedback = hf_model.invoke([HumanMessage(content=prompt)]).content
        return feedback

def extract_text_from_pdf(url: str, max_pages: int = 5) -> str:
    try:
        response = requests.get(url, timeout=15)
        if response.status_code != 200: return ""
        reader = PdfReader(BytesIO(response.content))
        text = [page.extract_text() or "" for page in reader.pages[:max_pages]]
        return "\n".join(text)
    except Exception: return ""

ss = SemanticScholarAPIWrapper(top_k_results=5, load_max_docs=5)

@tool
def semantic_scholar_research(query: str, summarize: bool = True) -> list:
    
    raw_results = ss.run(query)
    papers = raw_results.split("\n\n")
    summarized = []
    for paper in papers:
        summary, pdf_summary = "", ""
        if summarize and "abstract:" in paper.lower():
            summary = hf_model.invoke([HumanMessage(content=f"Summarize this abstract in 2-3 sentences:\n\n{paper}")]).content
        if "http" in paper and ".pdf" in paper.lower():
            pdf_url = "http" + paper.split("http")[1].split()[0]
            pdf_text = extract_text_from_pdf(pdf_url)
            if pdf_text and summarize:
                pdf_summary = hf_model.invoke([HumanMessage(content=f"Summarize PDF first 500 words:\n\n{pdf_text[:3000]}")]).content
        summarized.append({"raw_info": paper, "summary": summary, "pdf_summary": pdf_summary})
    return summarized

# ─────────────────────────────────────────────────────────────────────────────
# Agents
# ─────────────────────────────────────────────────────────────────────────────

duck_tool = DuckDuckGoSearchRun()
tavily_tool = TavilySearch()

curriculum_agent = create_react_agent(
    model=hf_model,
    tools=[topic_explanation, youtube_search_agent],
    name="curriculum_agent",
    prompt="""
You are CURRICULUM-GUIDE, an expert in learning and concept explanations.

Rules:
1. Use topic_explanation to provide clear step-by-step conceptual explanations.
2. Use youtube_search_agent to fetch **top 3 valid YouTube videos**.
3. Include optional learning steps if helpful.
4. Dynamically handle queries: if the user asks for tutorials, learning paths, or conceptual understanding, provide a concise, structured response.
5. Never hallucinate video links. Skip YouTube section if none found.
6. Return only the **final output**, well-formatted and readable.
"""
)
debug_agent = create_react_agent(
    model=hf_model,
    tools=[youtube_search_agent, duck_tool, tavily_tool],
    name="debug_agent",
    prompt="""
You are CODE-TUTOR, a friendly AI coding mentor.

Rules:
1. Dynamically detect whether the query is:
   - Code debugging
   - Explaining a programming concept
   - Providing exercises/examples
2. Explain concepts step-by-step, clearly and beginner-friendly.
3. Suggest precise fixes if code errors exist.
4. Attach relevant YouTube tutorials or resources dynamically.
5. Include mini-exercises or examples if relevant.
6. Always return **final output**, structured, and concise.
"""
)
resume_agent = create_react_agent(
    model=hf_model,
    tools=[resume_tool],
    name="resume_agent",
    prompt="""
You are RESUME-GUIDE. Your task is to dynamically decide if the user wants to:
1. Create a resume + cover letter
2. Review an existing resume for improvements

Rules:
- If the user provides an existing resume, call resume_tool in 'review' mode.
- Otherwise, call resume_tool in 'create' mode using job description in the query.
- Output should be polished, structured, and user-friendly.
- Always return only the final output (HTML or text feedback).
"""
)
unified_agent = create_react_agent(
    model=hf_model,
    tools=[youtube_search_agent, semantic_scholar_research],
    name="unified_agent",
    prompt="""
You are UNIFIED-AGENT, capable of handling:
- Research papers
- Academic queries
- YouTube transcript QA
- Content summarization

Rules:
1. Detect if a query contains a YouTube link → call youtube_qa.
2. Detect if a query is research-focused → call semantic_scholar_research.
3. Summarize clearly, citing sources explicitly.
4. If no info is available, respond clearly.
5. Always return **concise, user-friendly output**.
"""
)
news_agent = create_react_agent(
    model=hf_model,
    tools=[tavily_tool, duck_tool, youtube_search_agent],
    name="news_agent",
    prompt="""
You are TECH-TREND-GUIDE, an expert in technology news and trends.

Rules:
1. Dynamically detect tech news or trend queries.
2. Fetch latest updates via tavily_tool or duck_tool.
3. Include relevant YouTube videos if educational.
4. Summarize in **point-wise format**, highlighting tools, frameworks, and recommendations.
5. Keep it concise, actionable, and readable.
6. Return only **final output**, no internal reasoning.
"""
)

# ─────────────────────────────────────────────────────────────────────────────
# Dynamic Router Node
# ─────────────────────────────────────────────────────────────────────────────

def dynamic_router_node(state: MessagesState) -> MessagesState:
    last_user = ""
    for m in reversed(state["messages"]):
        if getattr(m, "role", "") == "user":
            last_user = m.content
            break

    prompt = f"""
You are an intelligent router for a multi-agent system.
Analyze the user's query and determine which agents to invoke.
Return **JSON array of agent names** in order (any of: curriculum_agent, debug_agent, resume_agent, unified_agent, news_agent).

Example:
- 'Create resume' -> ["resume_agent"]
- 'Review my resume' -> ["debug_agent"]
- 'Explain OOP and give roadmap' -> ["curriculum_agent"]

User Query:
\"\"\"{last_user}\"\"\"

Agents JSON:
"""
    try:
        response = hf_model.invoke([HumanMessage(content=prompt)]).content.strip()
        agents_to_call = json.loads(response)
        if not isinstance(agents_to_call, list) or not agents_to_call:
            agents_to_call = ["curriculum_agent"]
    except Exception:
        agents_to_call = ["curriculum_agent"]

    # Add a human message marker for each agent
    for agent_name in agents_to_call:
        state["messages"].append(HumanMessage(content=f"[CALL_AGENT]{agent_name}"))

    return state

# ─────────────────────────────────────────────────────────────────────────────
# Graph Network
# ─────────────────────────────────────────────────────────────────────────────

builder = StateGraph(MessagesState)
builder.add_node("router", dynamic_router_node)
builder.add_node("curriculum_agent", curriculum_agent)
builder.add_node("debug_agent", debug_agent)
builder.add_node("resume_agent", resume_agent)
builder.add_node("unified_agent", unified_agent)
builder.add_node("news_agent", news_agent)

builder.add_edge(START, "router")

# Conditional edges: router → agents based on HF decision
builder.add_conditional_edges(
    "router",
    lambda state: [msg.content.replace("[CALL_AGENT]", "") for msg in state["messages"] if "[CALL_AGENT]" in msg.content],
    {
        "curriculum_agent": "curriculum_agent",
        "debug_agent": "debug_agent",
        "resume_agent": "resume_agent",
        "unified_agent": "unified_agent",
        "news_agent": "news_agent",
    },
)

for agent in ["curriculum_agent", "debug_agent", "resume_agent", "unified_agent", "news_agent"]:
    builder.add_edge(agent, END)

network_app = builder.compile()

# ─────────────────────────────────────────────────────────────────────────────
# Example usage
# ─────────────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    queries = [
        "Create a resume and cover letter for Software Engineer",
        "Review my resume for improvements",
        "Explain Object-Oriented Programming and provide learning roadmap",
        "Latest trends in AI technologies"
    ]
    for q in queries:
        result = network_app.invoke({"messages": [{"role": "user", "content": q}]})
        for m in result["messages"]:
            if hasattr(m, "content"):
                print(m.content)
                print("\n" + "="*60 + "\n")


ValueError: Function must have a docstring if description not provided.

In [248]:
unified_agent = create_react_agent(
    model=hf_model,
    tools=[youtube_qa, semantic_scholar_research],
    name="unified_agent",  # <-- rename from "knowledge_agent"
    prompt="""
You are UNIFIED-AGENT, capable of handling YouTube transcript QA and research paper summarization.
- If a valid YouTube link is provided, use youtube_qa to answer or summarize the transcript.
- If a research query or PDF link is provided, use semantic_scholar_research to fetch and summarize papers.
- Indicate the source clearly (video or research paper).
- If info is unavailable, say so explicitly.
- Always provide concise, clear, and user-friendly answers.
"""
)


In [241]:

curriculum_agent = create_react_agent(
    model=hf_model,
    tools=[duck_tool, youtube_search],
    name="curriculum_agent",
    prompt="""
You are CURRICULUM-GUIDE, a knowledgeable educational expert.

Rules:

1. Generate a **personalized learning roadmap** based on the user's query.
2. Always fetch **top 3 YouTube videos** for the topic using the `youtube_search` tool.
   - Include **full video title and URL**.
   - Do not truncate, summarize, or remove links.
3. If no official video is found, output: "No related videos found."
4. Include step-by-step guidance in the roadmap.
5. Return only the **final clean output**, do not include internal agent thoughts, actions, or debug messages.
6. Format the response like this:

- **Course/Topic Name:** [Name]
- **Description:** [Brief summary]
- **YouTube Videos:** 
    [Title 1] - [URL 1]
    [Title 2] - [URL 2]
    [Title 3] - [URL 3]
- **Learning Steps:** 
    1. Step one
    2. Step two
    3. Step three

Always use the **exact output from the youtube_search tool** under "YouTube Videos".
"""
)


In [234]:
debug_agent = create_react_agent(
    model=hf_model,
    tools=[youtube_search, duck_tool, tavile_tool],
    name="debug_educational_agent",
    prompt="""
You are CODE-TUTOR, a friendly and knowledgeable AI teacher and coding mentor. 
When a student asks a question or submits code, follow these rules:

1. **Explain concepts clearly** in an educational and beginner-friendly manner.
2. **Debug the code**: identify errors, explain why they occur, and suggest precise fixes.
3. **Provide learning resources**: include relevant YouTube links or tutorials . 
4. **Give examples, exercises, or mini-quizzes** when helpful to reinforce understanding.
5. **Motivate the student**: encourage practice, exploration, and curiosity.
6. **Step-by-step guidance**: never give one-line answers; always walk the student through solutions.
7. **Format output clearly**: use numbered steps, bullet points, or code blocks where applicable.

Goal: Deliver comprehensive, interactive, and motivational educational guidance with youtube video.
"""
)


In [235]:
resume_agent = create_react_agent(
    model=hf_model,
    tools=[generate_resume],
    name="resume_agent",
    prompt="""
You are RESUME-GUIDE, an expert in generating resumes and cover letters. Your task is to produce
a high-quality, professional, structured resume and cover letter for the given job description.

Rules:
1. Use the provided generate_resume tool for producing HTML output.
2. Tailor content dynamically to match the job description.
3. Ensure output is balanced: concise yet informative, highlighting key skills, achievements, and fit.
4. Internally iterate if needed to refine clarity, professionalism, and formatting.
5. Return only the **final polished HTML output** without any internal messages.

Sections to include:
- Summary
- Skills
- Experience
- Cover Letter

Goal: Generate a professional, polished, and ready-to-use resume + cover letter.
"""
)


In [236]:
news_agent = create_react_agent(
    model=hf_model,
    tools=[tavile_tool, duck_tool, youtube_search],
    name="news_agent",
    prompt="""
You are TECH-TREND-GUIDE, a proactive and knowledgeable technology analyst. Your task:

1. Fetch the latest and trending technology news from multiple sources dynamically.
2. Summarize each news point clearly in **point-wise format** for quick reading.
3. Highlight **emerging tools, frameworks, and technologies** mentioned or relevant.
4. Include **YouTube videos** if they are educational, relevant, and verified. If no video is available, skip gracefully.
5. Provide **actionable insights or recommendations** wherever possible.
6. Decide intelligently which tool (tavile_tool, duck_tool, or youtube_search) to use for each query.
7. Keep the summary **concise, engaging, and readable**; avoid overly long paragraphs.
8. Return **only the final summarized news**, do not include internal reasoning or tool calls.

Example output format:
- News Point 1: [Brief summary]  
  Video (if available): [YouTube link]  
- News Point 2: [Brief summary]  
  Video (if available): [YouTube link]  
- Trending Tools/Tech: [List of tools or technologies]  
- Recommendation: [Optional actionable insight]

Focus on delivering a **dynamic, insightful, and agentic summary of tech trends**, with links only when they add real value.
"""
)


In [249]:
workflow = create_supervisor(
    [
        curriculum_agent,
        debug_agent,
        resume_agent,
        unified_agent,  # Handles YouTube QA & research papers
        news_agent
    ],
    model=hf_model,
    prompt="""
You are SUPERVISOR-GUIDE. Route user queries to the correct agent and return a concise, clear, final response.
Always provide structured, user-friendly output with valid links where applicable.
"""
)


In [250]:
app = workflow.compile()

In [244]:
student_query = (
        """Now based on this job description create a resume ,description=[About the Internship

Are you fascinated by data, machine learning, and AI-driven solutions? Do you want to gain hands-on experience in solving real-world business challenges using cutting-edge technology? As a Data Science Intern, you’ll work on projects that directly impact business decisions in the AdTech ecosystem, customer analytics, and generative AI applications.


This internship is designed for freshers who are eager to apply their academic knowledge to industry problems. You’ll collaborate with data scientists, engineers, and product managers to design models, analyze data, and deliver insights that power smarter strategies and solutions for clients.


Key Responsibilities

As part of the team, you’ll contribute to both research and development tasks, ensuring real business impact:

Business Translation: Work closely with stakeholders to translate business requirements into solvable data science problems.
Algorithm Development: Design and implement algorithms for multi-channel budget and bid optimization, particularly within the Walled Garden AdTech ecosystem.
Customer Analytics: Develop solutions for customer segmentation, churn prediction, and other predictive models using large-scale datasets.
Machine Learning: Build and train ML models using Python (Pandas, NumPy, Scikit-learn), focusing on real-world applications like ensemble methods, time series modeling, and boosting techniques.
Generative AI: Explore and implement solutions with Large Language Models (LLMs) and Generative AI tailored to client needs.
Continuous Improvement: Document, evaluate, and enhance algorithms and models for better performance and scalability.
Metrics & Outcomes: Define and track key performance indicators (KPIs) to measure the success of proposed solutions.
Collaboration: Partner with a cross-functional team of data scientists, engineers, and product managers throughout the product lifecycle.


Skills and Qualifications

We’re looking for motivated learners with a solid foundation in data science concepts:

Education: Bachelor’s degree in Computer Science, Statistics, Mathematics, or related field.
Programming: Proficiency in Python and experience with libraries such as Pandas, NumPy, and Scikit-learn.
SQL: Intermediate-level skills to query, manage, and analyze large datasets.
Machine Learning Knowledge: Familiarity with techniques such as regularization, boosting, random forests, ensemble methods, and time series modeling.
AI/ML Exposure: Prior experience or academic projects with LLMs and Generative AI.
Additional Plus: Knowledge of REST APIs, web services, and production-grade model deployment.
Soft Skills: Strong problem-solving mindset, excellent written and verbal communication, and the ability to work effectively in a team.
]"""
    )
    
result = app.invoke({
        "messages": [{"role": "user", "content": student_query}]
    })
    

In [245]:
for msg in result["messages"]:
    msg.pretty_print()



Now based on this job description create a resume ,description=[About the Internship

Are you fascinated by data, machine learning, and AI-driven solutions? Do you want to gain hands-on experience in solving real-world business challenges using cutting-edge technology? As a Data Science Intern, you’ll work on projects that directly impact business decisions in the AdTech ecosystem, customer analytics, and generative AI applications.


This internship is designed for freshers who are eager to apply their academic knowledge to industry problems. You’ll collaborate with data scientists, engineers, and product managers to design models, analyze data, and deliver insights that power smarter strategies and solutions for clients.


Key Responsibilities

As part of the team, you’ll contribute to both research and development tasks, ensuring real business impact:

Business Translation: Work closely with stakeholders to translate business requirements into solvable data science problems.
Algori

In [251]:
if __name__ == "__main__":
    
    student_query = (
        """Explain the difference between supervised and unsupervised machine learning with yutube video"""
    )
    
    result = app.invoke({
        "messages": [{"role": "user", "content": student_query}]
    })

In [252]:
# Suppose `result` is what you got from app.invoke(...)
for msg in result["messages"]:
    # Check if the message has 'content'
    if hasattr(msg, 'content'):
        print(f"{msg.name}:")  # Optional: show which agent
        print(msg.content)
        print("\n" + "="*80 + "\n")


None:
Explain the difference between supervised and unsupervised machine learning with yutube video


supervisor:



transfer_to_debug_educational_agent:
Successfully transferred to debug_educational_agent


debug_educational_agent:
**Supervised vs Unsupervised Machine Learning: A Clear Explanation**

In machine learning, we have two primary types of learning: supervised and unsupervised. Understanding the difference between these two is crucial for building accurate and effective models.

**Supervised Learning**
--------------------

Supervised learning is a type of machine learning where the model is trained on labeled data. This means that the data is already annotated with the correct output, and the model learns to map inputs to outputs based on these labels.

**Example:** Image classification, where the model is trained on images of dogs and cats, and the labels indicate whether the image is a dog or a cat.

**How it works:**

1. The model is trained on a dataset of labeled examp

In [None]:
None:
Explain the difference between supervised and unsupervised machine learning with examples

================================================================================

supervisor:


================================================================================

transfer_to_curriculum_agent:
Successfully transferred to curriculum_agent

================================================================================

curriculum_agent:
**Supervised vs Unsupervised Machine Learning**

Machine learning is a subset of artificial intelligence that enables computers to learn from data without being explicitly programmed. There are two primary types of machine learning: supervised and unsupervised.

**Supervised Machine Learning**

In supervised machine learning, the algorithm is trained on labeled data, where the correct output is already known. The goal is to learn a mapping between input data and output labels, so the algorithm can make predictions on new, unseen data.

Example:

* Image classification: Train a model on a dataset of labeled images (e.g., dogs, cats, cars) to learn the features that distinguish each class.
* Sentiment analysis: Train a model on a dataset of labeled text (e.g., positive, negative, neutral) to learn the features that indicate the sentiment of a piece of text.

**Unsupervised Machine Learning**

In unsupervised machine learning, the algorithm is trained on unlabeled data, and the goal is to discover patterns, relationships, or structure in the data.

Example:

* Clustering: Group similar data points into clusters based on their features (e.g., customer segmentation).
* Dimensionality reduction: Reduce the number of features in a dataset while preserving the most important information (e.g., PCA).

**Key differences:**

* **Labeled data**: Supervised learning requires labeled data, while unsupervised learning does not.
* **Goal**: Supervised learning aims to make predictions, while unsupervised learning aims to discover patterns or structure.
* **Algorithmic approach**: Supervised learning typically involves regression or classification algorithms, while unsupervised learning involves clustering, dimensionality reduction, or density estimation algorithms.

**YouTube Videos:**

* "Supervised vs Unsupervised Machine Learning" by Siraj Raval (3:34 minutes)
* "Unsupervised Machine Learning" by 3Blue1Brown (10:45 minutes)
* "Supervised vs Unsupervised Learning" by DataCamp (4:21 minutes)

**Learning Steps:**

1. Understand the concept of labeled and unlabeled data.
2. Learn the differences between supervised and unsupervised machine learning.
3. Study the examples and applications of each type of machine learning.
4. Practice implementing supervised and unsupervised algorithms using popular libraries like scikit-learn or TensorFlow.
5. Experiment with real-world datasets to apply your knowledge and gain hands-on experience.

================================================================================

curriculum_agent:
Transferring back to supervisor

================================================================================

transfer_back_to_supervisor:
Successfully transferred back to supervisor

================================================================================

supervisor:
It looks like you're ready to move on to the next topic. What would you like to explore next?

================================================================================


In [None]:
workflow = create_supervisor(
    [
        curriculum_agent, debug_agent,
        resume_agent, unified_agent, news_agent
    ],
    model=hf_model,
    prompt="""
You are SUPERVISOR-GUIDE, responsible for routing user queries to the appropriate specialized agent
(curriculum, debugging, resume/cover letter, feedback, tech news) and producing a final, 
well-formatted, concise, and complete response.

**Important:** For **any educational, conceptual, or curriculum-related query**, you must:

1. Provide a step-by-step explanation with examples.
2. Dynamically fetch up to 3 relevant YouTube videos using the `youtube_search` tool.
3. Include **only valid video titles and URLs** returned by the `youtube_search` tool.
4. If no videos are found, include this message: "No relevant video found; refer to online resources."
5. Format explanations clearly using bullet points, numbered lists, or code blocks.

Follow these rules:

1. **Identify intent**: Analyze the query and route it to the correct agent(s). Use multiple agents sequentially if needed.

2. **Curriculum / Learning queries**:
   - Fetch top relevant courses dynamically from websites.
   - Attach YouTube video links using `youtube_search`.
   - Present in **clean, point-wise format**: Course Name – Video Link

3. **Educational / Conceptual queries**:
   - Forward to `debug_agent` or handle directly.
   - Include step-by-step explanations and examples.
   - Attach **YouTube videos dynamically** using `youtube_search`.

4. **Debugging / Skills queries**: Forward to `debug_agent`. Include explanations, suggested fixes, examples, and exercises if relevant.

5. **Resume / Cover letter queries**: Forward to `resume_agent`. Ensure output is professional, concise, and structured (HTML or readable text).

6. **Feedback / Reviews**: Forward to `feedback_agent`. Responses should be friendly, motivational, and context-aware.

7. **Technology News**: Forward to `news_agent`. Summarize in **point-wise format**, including trending tools or technologies. Keep it concise and actionable.

8. **Formatting & Clarity**:
   - Return **only the final output**; do not include internal agent steps.
   - Include lists and links **only if valid**.
   - Ensure readability: not too short, not too long.

9. **Dynamic Handling**:
   - Prefer real-time data retrieval via web search or APIs.
   - Fall back gracefully if data is unavailable.

Goal: Provide **accurate, complete, user-friendly, and visually clear responses** while coordinating all agents efficiently.
"""
)


In [None]:
1. No title (Unkn)
   Authors: 
   Summary:  This research abstract is a review of recent Deep Learning advances in text summarization, an NLP task that condenses text into shorter forms while retaining meaning. It highlights the use of pointer...
   Link: None

2. No title (Unkn)
   Authors: 
   Summary:  This research examines the performance of popular transformer-based NLP models when fine-tuned on noisy text, specifically looking at spelling mistakes and typos. The findings suggest that these mode...
   Link: None

3. No title (Unkn)
   Authors: 
   Summary:  The abstract describes research that summarizes and examines current state-of-the-art NLP models, which have achieved success in various linguistic and semantic tasks through the Transformer architec...
   Link: None

4. No title (Unkn)
   Authors: 
   Summary:  This study compares the performance of NLP-based models and CNN-based models in predicting biomarkers in colorectal cancer using digital pathology images. The results showed that NLP-based models, sp...
   Link: None

5. No title (Unkn)
   Authors: 
   Summary:  The Transformer architecture and pre-trained models like BERT have significantly advanced NLP, but limitations remain in modeling certain kinds of information. This research shows that addressing the...
   Link: None


In [None]:

# # -------------------------
# # PDF Extraction Function
# # -------------------------
# def extract_text_from_pdf(url: str, max_pages: int = 5) -> str:
#     try:
#         response = requests.get(url, timeout=15)
#         if response.status_code != 200:
#             return ""
#         pdf_file = BytesIO(response.content)
#         reader = PdfReader(pdf_file)
#         text = [page.extract_text() or "" for page in reader.pages[:max_pages]]
#         return "\n".join(text)
#     except Exception as e:
#         return f"Error extracting PDF: {e}"

# # -------------------------
# # Initialize Semantic Scholar API
# # -------------------------
# ss = SemanticScholarAPIWrapper(
#     top_k_results=5,  # Number of top papers to fetch
#     load_max_docs=5,  # Limit of loaded documents
# )

# # -------------------------
# # Function to fetch & summarize papers
# # -------------------------
# def fetch_and_summarize(query: str, summarize: bool = True) -> List[Dict]:
#     raw_results = ss.run(query)  # Returns string with paper info
#     # Normally, you would parse JSON if you need structured data
#     # For demonstration, we split papers by double newlines
#     papers = raw_results.split("\n\n")
#     summarized_papers = []

#     for paper in papers:
#         # Optionally summarize abstract with LLM
#         summary = ""
#         if summarize and "abstract:" in paper.lower():
#             prompt = f"Summarize this research abstract in 2-3 sentences:\n\n{paper}"
#             summary = hf_model.invoke([HumanMessage(content=prompt)]).content
#         summarized_papers.append({
#             "raw_info": paper,
#             "summary": summary
#         })

#     return summarized_papers

Paper 1:
Raw Info: Published year: 2023
Title: Divide et Impera: Multi-Transformer Architectures for Complex NLP-Tasks
Authors: Solveig Helland, Elena Gavagnin, Alexandre de Spindler
Abstract: The growing capabilities of transformer models pave the way for solving increasingly complex NLP tasks. A key to supporting application-specific requirements is the ability to fine-tune. However, compiling a fine-tuning dataset tailored to complex tasks is tedious and results in large datasets, limiting the ability to control transformer output. We present an approach in which complex tasks are divided into simpler subtasks. Multiple transformer models are fine-tuned to one subtask each, and lined up to accomplish the complex task. This simplifies the compilation of fine-tuning datasets and increases overall controllability. Using the example of reducing gender bias as a complex task, we demonstrate our approach and show that it performs better than using a single model.
Summary:  This research, 

In [88]:
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled, VideoUnavailable
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

# -----------------------------
# Initialize LLM
# -----------------------------
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
    task="text-generation",
    do_sample=False,
    temperature=0.1
)
model = ChatHuggingFace(llm=llm)

# -----------------------------
# Prompt Template
# -----------------------------
prompt_template = """
You are a helpful assistant designed to answer questions about a YouTube video based on its transcript.
Answer the user's question using ONLY the provided transcript context.
If the information is not in the context, explicitly say "I cannot find information about that in the video transcript."

Transcript:
{context}

Question:
{question}

Answer:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

# -----------------------------
# Transcript Fetcher
# -----------------------------
def get_transcript(video_id: str):
    """Fetch transcript safely for old and new versions of youtube_transcript_api."""
    try:
        transcript_list = YouTubeTranscriptApi().fetch(video_id, languages=["hi"])
    except NoTranscriptFound:
        try:
            transcript_list = YouTubeTranscriptApi().fetch(video_id, languages=["en"])
        except NoTranscriptFound:
            return None, "No transcript available in Hindi or English."
    except (TranscriptsDisabled, VideoUnavailable) as e:
        return None, str(e)
    except Exception as e:
        return None, f"Unexpected error: {str(e)}"

    # Handle both dicts and FetchedTranscriptSnippet objects
    texts = []
    for snippet in transcript_list:
        if isinstance(snippet, dict):
            texts.append(snippet.get("text", ""))
        else:
            # FetchedTranscriptSnippet object
            texts.append(getattr(snippet, "text", ""))

    transcript = " ".join(texts)
    return transcript, None


# -----------------------------
# Main YouTube QA / Summarizer
# -----------------------------
def youtube_qa(video_url: str, question: str):
    """Given a YouTube URL and question, return answer based on transcript."""
    try:
        video_id = video_url.split("v=")[-1].split("&")[0]  # Extract only the video ID
        transcript, error = get_transcript(video_id)
        if error:
            return error

        rag_runnable = (
            {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
            | prompt
            | model
        )
        answer = rag_runnable.invoke({"context": transcript, "question": question})
        return answer

    except Exception as e:
        return f"Error: {str(e)}"

# -----------------------------
# Example usage
# -----------------------------
video_url = "https://www.youtube.com/watch?v=QsYGlZkevEg"
question = "What is the main topic of this video?"
print(youtube_qa(video_url, question))


content=' The main topic of this video is Pedro Pascal\'s career, his experiences shooting different shows like "The Last of Us" on HBO, "Game of Thrones," and "The Mandalorian," as well as his family background and early career in Chile and the United States.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 62, 'prompt_tokens': 2206, 'total_tokens': 2268}, 'model_name': 'mistralai/Mixtral-8x7B-Instruct-v0.1', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run--57002112-6597-4390-9779-a503a313ac28-0' usage_metadata={'input_tokens': 2206, 'output_tokens': 62, 'total_tokens': 2268}
